Skip to content

Commit e3396f3

Browse files
committed
sampling : change _cp/copy to clone
1 parent b85edd7 commit e3396f3

File tree

9 files changed

+50
-51
lines changed

9 files changed

+50
-51
lines changed

common/sampling.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,13 @@ void gpt_sampler_free(struct gpt_sampler * gsmpl) {
114114
}
115115
}
116116

117-
struct gpt_sampler * gpt_sampler_cp(gpt_sampler * gsmpl) {
117+
struct gpt_sampler * gpt_sampler_clone(gpt_sampler * gsmpl) {
118118
return new gpt_sampler {
119119
/* .params = */ gsmpl->params,
120-
/* .bias = */ llama_constraint_cp(gsmpl->bias),
121-
/* .pnlt = */ llama_constraint_cp(gsmpl->pnlt),
122-
/* .grmr = */ llama_constraint_cp(gsmpl->grmr),
123-
/* .smpl = */ llama_sampler_cp (gsmpl->smpl)
120+
/* .bias = */ llama_constraint_clone(gsmpl->bias),
121+
/* .pnlt = */ llama_constraint_clone(gsmpl->pnlt),
122+
/* .grmr = */ llama_constraint_clone(gsmpl->grmr),
123+
/* .smpl = */ llama_sampler_clone (gsmpl->smpl)
124124
};
125125
}
126126

common/sampling.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const st
6868

6969
void gpt_sampler_free(struct gpt_sampler * gsmpl);
7070

71-
struct gpt_sampler * gpt_sampler_cp(gpt_sampler * gsmpl);
71+
struct gpt_sampler * gpt_sampler_clone(gpt_sampler * gsmpl);
7272

7373
void gpt_sampler_accept(struct gpt_sampler * gsmpl, llama_token token, bool apply_grammar);
7474
void gpt_sampler_reset (struct gpt_sampler * gsmpl);

examples/speculative/speculative.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ int main(int argc, char ** argv) {
451451
if (drafts[0].smpl) {
452452
gpt_sampler_free(drafts[0].smpl);
453453
}
454-
drafts[0].smpl = gpt_sampler_cp(smpl);
454+
drafts[0].smpl = gpt_sampler_clone(smpl);
455455

456456
int n_seq_cur = 1;
457457
int n_past_cur = n_past_dft;
@@ -523,7 +523,7 @@ int main(int argc, char ** argv) {
523523
if (drafts[n_seq_cur].smpl) {
524524
gpt_sampler_free(drafts[n_seq_cur].smpl);
525525
}
526-
drafts[n_seq_cur].smpl = gpt_sampler_cp(drafts[s].smpl);
526+
drafts[n_seq_cur].smpl = gpt_sampler_clone(drafts[s].smpl);
527527

528528
sa.push_back(n_seq_cur);
529529

include/llama.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,7 @@ extern "C" {
10301030
void (*accept)( struct llama_constraint * cnstr, llama_token token); // can be NULL
10311031
void (*apply) ( struct llama_constraint * cnstr, llama_token_data_array * cur_p); // required
10321032
void (*reset) ( struct llama_constraint * cnstr); // can be NULL
1033-
struct llama_constraint * (*copy) (const struct llama_constraint * cnstr); // can be NULL if ctx is NULL
1033+
struct llama_constraint * (*clone) (const struct llama_constraint * cnstr); // can be NULL if ctx is NULL
10341034
void (*free) ( struct llama_constraint * cnstr); // can be NULL if ctx is NULL
10351035

10361036
// TODO: API for internal libllama usage for appending the sampling to an existing ggml_cgraph
@@ -1051,11 +1051,22 @@ extern "C" {
10511051
LLAMA_API struct llama_constraint * llama_constraint_init_temp (float t);
10521052
LLAMA_API struct llama_constraint * llama_constraint_init_temp_ext (float t, float delta, float exponent);
10531053

1054+
/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
1055+
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
1056+
/// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
1057+
/// @param eta The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates.
1058+
/// @param m The number of tokens considered in the estimation of `s_hat`. This is an arbitrary value that is used to calculate `s_hat`, which in turn helps to calculate the value of `k`. In the paper, they use `m = 100`, but you can experiment with different values to see how it affects the performance of the algorithm.
1059+
/// @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal.
10541060
LLAMA_API struct llama_constraint * llama_constraint_init_mirostat(
10551061
const struct llama_model * model,
10561062
float tau,
10571063
float eta);
10581064

1065+
/// @details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
1066+
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
1067+
/// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
1068+
/// @param eta The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates.
1069+
/// @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal.
10591070
LLAMA_API struct llama_constraint * llama_constraint_init_mirostat_v2(
10601071
float tau,
10611072
float eta);
@@ -1079,7 +1090,7 @@ extern "C" {
10791090
int32_t n_logit_bias,
10801091
const llama_logit_bias * logit_bias);
10811092

1082-
LLAMA_API struct llama_constraint * llama_constraint_cp(const struct llama_constraint * cnstr);
1093+
LLAMA_API struct llama_constraint * llama_constraint_clone(const struct llama_constraint * cnstr);
10831094

10841095
// important: do not call if the constraint has been added to a llama_sampler (via llama_sampler_constraint_add)
10851096
LLAMA_API void llama_constraint_free(struct llama_constraint * cnstr);
@@ -1092,7 +1103,7 @@ extern "C" {
10921103

10931104
LLAMA_API struct llama_sampler * llama_sampler_init (const struct llama_model * model, struct llama_sampler_params params);
10941105
LLAMA_API void llama_sampler_free ( struct llama_sampler * smpl);
1095-
LLAMA_API struct llama_sampler * llama_sampler_cp (const struct llama_sampler * smpl);
1106+
LLAMA_API struct llama_sampler * llama_sampler_clone (const struct llama_sampler * smpl);
10961107
LLAMA_API void llama_sampler_reset ( struct llama_sampler * smpl);
10971108
LLAMA_API void llama_sampler_accept( struct llama_sampler * smpl, llama_token token);
10981109
LLAMA_API void llama_sampler_apply ( struct llama_sampler * smpl, llama_token_data_array * cur_p);

src/llama-grammar.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1050,7 +1050,7 @@ void llama_grammar_free_impl(struct llama_grammar * grammar) {
10501050
delete grammar;
10511051
}
10521052

1053-
struct llama_grammar * llama_grammar_cp_impl(const struct llama_grammar & grammar) {
1053+
struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
10541054
llama_grammar * result = new llama_grammar { grammar.vocab, grammar.rules, grammar.stacks, grammar.partial_utf8, };
10551055

10561056
// redirect elements in stacks to point to new rules

src/llama-grammar.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ struct llama_grammar * llama_grammar_init_impl(const struct llama_vocab * vocab,
131131

132132
void llama_grammar_free_impl(struct llama_grammar * grammar);
133133

134-
struct llama_grammar * llama_grammar_cp_impl(const struct llama_grammar & grammar);
134+
struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar);
135135

136136
// TODO: move the API below as member functions of llama_grammar
137137
void llama_grammar_apply_impl(

src/llama-sampling.cpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ static struct llama_constraint_i llama_constraint_softmax_i = {
433433
llama_constraint_softmax_impl(cur_p);
434434
},
435435
/* .reset = */ nullptr,
436-
/* .copy = */ nullptr,
436+
/* .clone = */ nullptr,
437437
/* .free = */ nullptr,
438438
};
439439

@@ -458,7 +458,7 @@ static struct llama_constraint_i llama_constraint_top_k_i = {
458458
llama_constraint_top_k_impl(cur_p, ctx->k);
459459
},
460460
/* .reset = */ nullptr,
461-
/* .copy = */ [](const struct llama_constraint * cnstr) {
461+
/* .clone = */ [](const struct llama_constraint * cnstr) {
462462
const auto * ctx = (const llama_constraint_context_top_k *) cnstr->ctx;
463463
return llama_constraint_init_top_k_impl(ctx->k);
464464
},
@@ -491,7 +491,7 @@ static struct llama_constraint_i llama_constraint_top_p_i = {
491491
llama_constraint_top_p_impl(cur_p, ctx->p, ctx->min_keep);
492492
},
493493
/* .reset = */ nullptr,
494-
/* .copy = */ [](const struct llama_constraint * cnstr) {
494+
/* .clone = */ [](const struct llama_constraint * cnstr) {
495495
const auto * ctx = (const llama_constraint_context_top_p *) cnstr->ctx;
496496
return llama_constraint_init_top_p_impl(ctx->p, ctx->min_keep);
497497
},
@@ -525,7 +525,7 @@ static struct llama_constraint_i llama_constraint_min_p_i = {
525525
llama_constraint_min_p_impl(cur_p, ctx->p, ctx->min_keep);
526526
},
527527
/* .reset = */ nullptr,
528-
/* .copy = */ [](const struct llama_constraint * cnstr) {
528+
/* .clone = */ [](const struct llama_constraint * cnstr) {
529529
const auto * ctx = (const llama_constraint_context_min_p *) cnstr->ctx;
530530
return llama_constraint_init_min_p_impl(ctx->p, ctx->min_keep);
531531
},
@@ -559,7 +559,7 @@ static struct llama_constraint_i llama_constraint_tail_free_i = {
559559
llama_constraint_tail_free_impl(cur_p, ctx->z, ctx->min_keep);
560560
},
561561
/* .reset = */ nullptr,
562-
/* .copy = */ [](const struct llama_constraint * cnstr) {
562+
/* .clone = */ [](const struct llama_constraint * cnstr) {
563563
const auto * ctx = (const llama_constraint_context_tail_free *) cnstr->ctx;
564564
return llama_constraint_init_tail_free_impl(ctx->z, ctx->min_keep);
565565
},
@@ -593,7 +593,7 @@ static struct llama_constraint_i llama_constraint_typical_i = {
593593
llama_constraint_typical_impl(cur_p, ctx->p, ctx->min_keep);
594594
},
595595
/* .reset = */ nullptr,
596-
/* .copy = */ [](const struct llama_constraint * cnstr) {
596+
/* .clone = */ [](const struct llama_constraint * cnstr) {
597597
const auto * ctx = (const llama_constraint_context_typical *) cnstr->ctx;
598598
return llama_constraint_init_typical_impl(ctx->p, ctx->min_keep);
599599
},
@@ -626,7 +626,7 @@ static struct llama_constraint_i llama_constraint_temp_i = {
626626
llama_constraint_temp_impl(cur_p, ctx->temp);
627627
},
628628
/* .reset = */ nullptr,
629-
/* .copy = */ [](const struct llama_constraint * cnstr) {
629+
/* .clone = */ [](const struct llama_constraint * cnstr) {
630630
const auto * ctx = (const llama_constraint_context_temp *) cnstr->ctx;
631631
return llama_constraint_init_temp_impl(ctx->temp);
632632
},
@@ -667,7 +667,7 @@ static struct llama_constraint_i llama_constraint_temp_ext_i = {
667667
}
668668
},
669669
/* .reset = */ nullptr,
670-
/* .copy = */ [](const struct llama_constraint * cnstr) {
670+
/* .clone = */ [](const struct llama_constraint * cnstr) {
671671
const auto * ctx = (const llama_constraint_context_temp_ext *) cnstr->ctx;
672672
return llama_constraint_init_temp_ext_impl(ctx->temp, ctx->delta, ctx->exponent);
673673
},
@@ -754,7 +754,7 @@ static struct llama_constraint_i llama_constraint_mirostat_i = {
754754
auto * ctx = (llama_constraint_context_mirostat *) cnstr->ctx;
755755
ctx->mu = 2.0f*ctx->tau;
756756
},
757-
/* .copy = */ [](const struct llama_constraint * cnstr) {
757+
/* .clone = */ [](const struct llama_constraint * cnstr) {
758758
const auto * ctx = (const llama_constraint_context_mirostat *) cnstr->ctx;
759759
return llama_constraint_init_mirostat_impl(*ctx->vocab, ctx->tau, ctx->eta, ctx->m);
760760
},
@@ -834,7 +834,7 @@ static struct llama_constraint_i llama_constraint_mirostat_v2_i = {
834834
auto * ctx = (llama_constraint_context_mirostat_v2 *) cnstr->ctx;
835835
ctx->mu = 2.0f*ctx->tau;
836836
},
837-
/* .copy = */ [](const struct llama_constraint * cnstr) {
837+
/* .clone = */ [](const struct llama_constraint * cnstr) {
838838
const auto * ctx = (const llama_constraint_context_mirostat_v2 *) cnstr->ctx;
839839
return llama_constraint_init_mirostat_v2_impl(ctx->tau, ctx->eta);
840840
},
@@ -891,7 +891,7 @@ static struct llama_constraint_i llama_constraint_grammar_i = {
891891
llama_grammar_free_impl(ctx->grammar);
892892
ctx->grammar = grammar_new;
893893
},
894-
/* .copy = */ [](const struct llama_constraint * cnstr) {
894+
/* .clone = */ [](const struct llama_constraint * cnstr) {
895895
const auto * ctx_src = (const llama_constraint_context_grammar *) cnstr->ctx;
896896

897897
auto * result = llama_constraint_init_grammar_impl(*ctx_src->vocab, nullptr, nullptr);
@@ -901,7 +901,7 @@ static struct llama_constraint_i llama_constraint_grammar_i = {
901901
ctx_dst->grammar_str = ctx_src->grammar_str;
902902
ctx_dst->grammar_root = ctx_src->grammar_root;
903903

904-
ctx_dst->grammar = llama_grammar_cp_impl(*ctx_src->grammar);
904+
ctx_dst->grammar = llama_grammar_clone_impl(*ctx_src->grammar);
905905
}
906906

907907
return result;
@@ -998,7 +998,7 @@ static struct llama_constraint_i llama_constraint_penalties_i = {
998998
auto * ctx = (llama_constraint_context_penalties *) cnstr->ctx;
999999
ctx->prev.clear();
10001000
},
1001-
/* .copy = */ [](const struct llama_constraint * cnstr) {
1001+
/* .clone = */ [](const struct llama_constraint * cnstr) {
10021002
const auto * ctx_src = (const llama_constraint_context_penalties *) cnstr->ctx;
10031003
auto * result = llama_constraint_init_penalties_impl(
10041004
*ctx_src->vocab,
@@ -1059,7 +1059,7 @@ static struct llama_constraint_i llama_constraint_logit_bias_i = {
10591059
}
10601060
},
10611061
/* .reset = */ nullptr,
1062-
/* .copy = */ [](const struct llama_constraint * cnstr) {
1062+
/* .clone = */ [](const struct llama_constraint * cnstr) {
10631063
const auto * ctx_src = (const llama_constraint_context_logit_bias *) cnstr->ctx;
10641064
return llama_constraint_init_logit_bias_impl(*ctx_src->vocab, ctx_src->logit_bias.size(), ctx_src->logit_bias.data());
10651065
},
@@ -1083,8 +1083,8 @@ struct llama_constraint * llama_constraint_init_logit_bias_impl(
10831083

10841084
////////////////////////////////////////
10851085

1086-
struct llama_constraint * llama_constraint_cp_impl(const struct llama_constraint & cnstr) {
1087-
return cnstr.iface->copy ? cnstr.iface->copy(&cnstr) : nullptr;
1086+
struct llama_constraint * llama_constraint_clone_impl(const struct llama_constraint & cnstr) {
1087+
return cnstr.iface->clone ? cnstr.iface->clone(&cnstr) : nullptr;
10881088
}
10891089

10901090
void llama_constraint_free_impl(struct llama_constraint * cnstr) {
@@ -1148,7 +1148,7 @@ void llama_sampler_free_impl(struct llama_sampler * smpl) {
11481148
delete smpl;
11491149
}
11501150

1151-
struct llama_sampler * llama_sampler_cp_impl(const struct llama_sampler & smpl) {
1151+
struct llama_sampler * llama_sampler_clone_impl(const struct llama_sampler & smpl) {
11521152
auto * result = new llama_sampler {
11531153
/* .params = */ smpl.params,
11541154
/* .vocab = */ smpl.vocab,
@@ -1163,7 +1163,7 @@ struct llama_sampler * llama_sampler_cp_impl(const struct llama_sampler & smpl)
11631163
/* .n_sample = */ 0,
11641164
};
11651165

1166-
// copy the constraints objects
1166+
// clone the constraints objects
11671167
result->constraints.clear();
11681168
for (const auto & cnstr : smpl.constraints) {
11691169
if (cnstr->ctx == nullptr) {
@@ -1172,8 +1172,8 @@ struct llama_sampler * llama_sampler_cp_impl(const struct llama_sampler & smpl)
11721172
/* .ctx = */ nullptr,
11731173
});
11741174
} else {
1175-
GGML_ASSERT(cnstr->iface->copy);
1176-
result->constraints.push_back(cnstr->iface->copy(cnstr));
1175+
GGML_ASSERT(cnstr->iface->clone);
1176+
result->constraints.push_back(cnstr->iface->clone(cnstr));
11771177
}
11781178
}
11791179

0 commit comments

Comments
 (0)