Skip to content

Commit b3117d6

Browse files
committed
params : remove is_same()
ggml-ci
1 parent 1a59845 commit b3117d6

File tree

6 files changed

+32
-71
lines changed

6 files changed

+32
-71
lines changed

src/llama-batch.h

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -34,31 +34,6 @@ struct llama_ubatch {
3434
llama_seq_id * seq_id_unq; // [n_seqs_unq] | s | seq_id
3535
int32_t * seq_idx; // [LLAMA_MAX_SEQ] | - | seq_idx
3636
int8_t * output; // [n_tokens] | i | -
37-
38-
bool is_same(const llama_ubatch & other) const {
39-
bool res =
40-
equal_seqs == other.equal_seqs &&
41-
n_tokens == other.n_tokens &&
42-
n_seq_tokens == other.n_seq_tokens &&
43-
n_seqs == other.n_seqs &&
44-
n_seqs_unq == other.n_seqs_unq &&
45-
(
46-
(!token && !other.token) ||
47-
(!embd && !other.embd)
48-
);
49-
50-
if (!res) {
51-
return false;
52-
}
53-
54-
// TODO: this won't work because seq_id_unq ptr can point to an old balloc that has
55-
// been freed by this point. find a way to fix this
56-
//for (uint32_t s = 0; s < n_seqs_unq; ++s) {
57-
// res &= seq_id_unq[s] == other.seq_id_unq[s];
58-
//}
59-
60-
return res;
61-
}
6237
};
6338

6439
// a helper for sanitizing, fulfilling and splitting a batch

src/llama-cparams.cpp

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,3 @@
33
size_t llama_max_parallel_sequences(void) {
44
return LLAMA_MAX_SEQ;
55
}
6-
7-
bool llama_cparams::is_same(const llama_cparams & other) const {
8-
return
9-
n_ctx == other.n_ctx &&
10-
n_batch == other.n_batch &&
11-
n_ubatch == other.n_ubatch &&
12-
n_seq_max == other.n_seq_max &&
13-
n_threads == other.n_threads &&
14-
n_threads_batch == other.n_threads_batch &&
15-
rope_freq_base == other.rope_freq_base &&
16-
rope_freq_scale == other.rope_freq_scale &&
17-
n_ctx_orig_yarn == other.n_ctx_orig_yarn &&
18-
yarn_ext_factor == other.yarn_ext_factor &&
19-
yarn_attn_factor == other.yarn_attn_factor &&
20-
yarn_beta_fast == other.yarn_beta_fast &&
21-
yarn_beta_slow == other.yarn_beta_slow &&
22-
defrag_thold == other.defrag_thold &&
23-
embeddings == other.embeddings &&
24-
causal_attn == other.causal_attn &&
25-
offload_kqv == other.offload_kqv &&
26-
flash_attn == other.flash_attn &&
27-
no_perf == other.no_perf &&
28-
warmup == other.warmup &&
29-
op_offload == other.op_offload &&
30-
pooling_type == other.pooling_type &&
31-
cb_eval == other.cb_eval &&
32-
cb_eval_user_data == other.cb_eval_user_data;
33-
}

src/llama-cparams.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,4 @@ struct llama_cparams {
3838

3939
ggml_backend_sched_eval_callback cb_eval;
4040
void * cb_eval_user_data;
41-
42-
bool is_same(const llama_cparams & other) const;
4341
};

src/llama-graph.h

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -421,11 +421,38 @@ struct llm_graph_params {
421421
// TODO: temporary
422422
llm_graph_result_i * res;
423423

424-
bool is_same(const llm_graph_params & other) const {
424+
// return true if the "other" params would result in a graph with the same topology as with the current params
425+
// having the same topology allows us to reuse the graph in some cases
426+
bool allow_reuse(const llm_graph_params & other) const {
427+
// first check the ubatch
428+
bool can_reuse_ubatch =
429+
ubatch.equal_seqs == other.ubatch.equal_seqs &&
430+
ubatch.n_tokens == other.ubatch.n_tokens &&
431+
ubatch.n_seq_tokens == other.ubatch.n_seq_tokens &&
432+
ubatch.n_seqs == other.ubatch.n_seqs &&
433+
ubatch.n_seqs_unq == other.ubatch.n_seqs_unq &&
434+
(
435+
(!ubatch.token && !other.ubatch.token) ||
436+
(!ubatch.embd && !other.ubatch.embd)
437+
);
438+
439+
// TODO: this won't work because seq_id_unq ptr can point to an old balloc that has
440+
// been freed by this point. find a way to fix this
441+
//for (uint32_t s = 0; s < n_seqs_unq; ++s) {
442+
// can_reuse_ubatch &= seq_id_unq[s] == other.seq_id_unq[s];
443+
//}
444+
445+
// for now conservatively disallow, until the issue above is resolved
446+
// ref: https://github.com/ggml-org/llama.cpp/pull/14363
447+
can_reuse_ubatch = can_reuse_ubatch && !ubatch.equal_seqs;
448+
449+
if (!can_reuse_ubatch) {
450+
return false;
451+
}
452+
425453
return
426-
hparams.is_same(other.hparams) &&
427-
cparams.is_same(other.cparams) &&
428-
ubatch .is_same(other.ubatch) &&
454+
cparams.embeddings == other.cparams.embeddings &&
455+
cparams.causal_attn == other.cparams.causal_attn &&
429456
arch == other.arch &&
430457
gtype == other.gtype &&
431458
cvec == other.cvec &&
@@ -488,7 +515,7 @@ class llm_graph_result : public llm_graph_result_i {
488515
// contexts of the input tensors of the graph and we can reuse it for another computation
489516
// return true if the graph was updated and can be reused
490517
bool can_reuse(const llm_graph_params & params) override {
491-
if (!this->params.is_same(params)) {
518+
if (!this->params.allow_reuse(params)) {
492519
return false;
493520
}
494521

src/llama-hparams.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,3 @@ bool llama_hparams::is_swa(uint32_t il) const {
102102

103103
GGML_ABORT("fatal error");
104104
}
105-
106-
bool llama_hparams::is_same(const llama_hparams & other) const {
107-
return
108-
n_ctx_train == other.n_ctx_train &&
109-
n_embd == other.n_embd &&
110-
n_layer == other.n_layer &&
111-
n_expert == other.n_expert &&
112-
n_expert_used == other.n_expert_used;
113-
}

src/llama-hparams.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,6 @@ struct llama_hparams {
202202
uint32_t n_pos_per_embd() const;
203203

204204
bool is_swa(uint32_t il) const;
205-
206-
bool is_same(const llama_hparams & other) const;
207205
};
208206

209207
static_assert(std::is_trivially_copyable<llama_hparams>::value, "llama_hparams must be trivially copyable");

0 commit comments

Comments
 (0)