params : remove is_same()

ggerganov · ggerganov · commit b3117d6ba068 · 2025-07-11T11:09:34.000+03:00
ggml-ci
diff --git a/src/llama-batch.h b/src/llama-batch.h
@@ -34,31 +34,6 @@ struct llama_ubatch {
     llama_seq_id *  seq_id_unq; // [n_seqs_unq]       | s   | seq_id
     int32_t      *  seq_idx;    // [LLAMA_MAX_SEQ]    | -   | seq_idx
     int8_t       *  output;     // [n_tokens]         | i   | -
-
-    bool is_same(const llama_ubatch & other) const {
-        bool res =
-            equal_seqs   == other.equal_seqs &&
-            n_tokens     == other.n_tokens &&
-            n_seq_tokens == other.n_seq_tokens &&
-            n_seqs       == other.n_seqs &&
-            n_seqs_unq   == other.n_seqs_unq &&
-            (
-                (!token && !other.token) ||
-                (!embd  && !other.embd)
-            );
-
-        if (!res) {
-            return false;
-        }
-
-        // TODO: this won't work because seq_id_unq ptr can point to an old balloc that has
-        //       been freed by this point. find a way to fix this
-        //for (uint32_t s = 0; s < n_seqs_unq; ++s) {
-        //    res &= seq_id_unq[s] == other.seq_id_unq[s];
-        //}
-
-        return res;
-    }
 };
 
 // a helper for sanitizing, fulfilling and splitting a batch
diff --git a/src/llama-cparams.cpp b/src/llama-cparams.cpp
@@ -3,31 +3,3 @@
 size_t llama_max_parallel_sequences(void) {
     return LLAMA_MAX_SEQ;
 }
-
-bool llama_cparams::is_same(const llama_cparams & other) const {
-    return
-        n_ctx               == other.n_ctx               &&
-        n_batch             == other.n_batch             &&
-        n_ubatch            == other.n_ubatch            &&
-        n_seq_max           == other.n_seq_max           &&
-        n_threads           == other.n_threads           &&
-        n_threads_batch     == other.n_threads_batch     &&
-        rope_freq_base      == other.rope_freq_base      &&
-        rope_freq_scale     == other.rope_freq_scale     &&
-        n_ctx_orig_yarn     == other.n_ctx_orig_yarn     &&
-        yarn_ext_factor     == other.yarn_ext_factor     &&
-        yarn_attn_factor    == other.yarn_attn_factor    &&
-        yarn_beta_fast      == other.yarn_beta_fast      &&
-        yarn_beta_slow      == other.yarn_beta_slow      &&
-        defrag_thold        == other.defrag_thold        &&
-        embeddings          == other.embeddings          &&
-        causal_attn         == other.causal_attn         &&
-        offload_kqv         == other.offload_kqv         &&
-        flash_attn          == other.flash_attn          &&
-        no_perf             == other.no_perf             &&
-        warmup              == other.warmup              &&
-        op_offload          == other.op_offload          &&
-        pooling_type        == other.pooling_type        &&
-        cb_eval             == other.cb_eval             &&
-        cb_eval_user_data   == other.cb_eval_user_data;
-}
diff --git a/src/llama-cparams.h b/src/llama-cparams.h
@@ -38,6 +38,4 @@ struct llama_cparams {
 
     ggml_backend_sched_eval_callback cb_eval;
     void * cb_eval_user_data;
-
-    bool is_same(const llama_cparams & other) const;
 };
diff --git a/src/llama-graph.h b/src/llama-graph.h
@@ -421,11 +421,38 @@ struct llm_graph_params {
     // TODO: temporary
     llm_graph_result_i * res;
 
-    bool is_same(const llm_graph_params & other) const {
+    // return true if the "other" params would result in a graph with the same topology as with the current params
+    //   having the same topology allows us to reuse the graph in some cases
+    bool allow_reuse(const llm_graph_params & other) const {
+        // first check the ubatch
+        bool can_reuse_ubatch =
+            ubatch.equal_seqs   == other.ubatch.equal_seqs &&
+            ubatch.n_tokens     == other.ubatch.n_tokens &&
+            ubatch.n_seq_tokens == other.ubatch.n_seq_tokens &&
+            ubatch.n_seqs       == other.ubatch.n_seqs &&
+            ubatch.n_seqs_unq   == other.ubatch.n_seqs_unq &&
+            (
+                (!ubatch.token && !other.ubatch.token) ||
+                (!ubatch.embd  && !other.ubatch.embd)
+            );
+
+        // TODO: this won't work because seq_id_unq ptr can point to an old balloc that has
+        //       been freed by this point. find a way to fix this
+        //for (uint32_t s = 0; s < n_seqs_unq; ++s) {
+        //    can_reuse_ubatch &= seq_id_unq[s] == other.seq_id_unq[s];
+        //}
+
+        // for now conservatively disallow, until the issue above is resolved
+        // ref: https://github.com/ggml-org/llama.cpp/pull/14363
+        can_reuse_ubatch = can_reuse_ubatch && !ubatch.equal_seqs;
+
+        if (!can_reuse_ubatch) {
+            return false;
+        }
+
         return
-            hparams.is_same(other.hparams) &&
-            cparams.is_same(other.cparams) &&
-            ubatch .is_same(other.ubatch)  &&
+            cparams.embeddings  == other.cparams.embeddings  &&
+            cparams.causal_attn == other.cparams.causal_attn &&
             arch      == other.arch  &&
             gtype     == other.gtype &&
             cvec      == other.cvec  &&
@@ -488,7 +515,7 @@ class llm_graph_result : public llm_graph_result_i {
     //   contexts of the input tensors of the graph and we can reuse it for another computation
     // return true if the graph was updated and can be reused
     bool can_reuse(const llm_graph_params & params) override {
-        if (!this->params.is_same(params)) {
+        if (!this->params.allow_reuse(params)) {
             return false;
         }
 
diff --git a/src/llama-hparams.cpp b/src/llama-hparams.cpp
@@ -102,12 +102,3 @@ bool llama_hparams::is_swa(uint32_t il) const {
 
     GGML_ABORT("fatal error");
 }
-
-bool llama_hparams::is_same(const llama_hparams & other) const {
-    return
-        n_ctx_train == other.n_ctx_train &&
-        n_embd == other.n_embd &&
-        n_layer == other.n_layer &&
-        n_expert == other.n_expert &&
-        n_expert_used == other.n_expert_used;
-}
diff --git a/src/llama-hparams.h b/src/llama-hparams.h
@@ -202,8 +202,6 @@ struct llama_hparams {
     uint32_t n_pos_per_embd() const;
 
     bool is_swa(uint32_t il) const;
-
-    bool is_same(const llama_hparams & other) const;
 };
 
 static_assert(std::is_trivially_copyable<llama_hparams>::value, "llama_hparams must be trivially copyable");