added cb_eval & cb_eval_user_data to context_params.

MarcusDunn · MarcusDunn · commit a5fd5386947b · 2024-01-19T14:44:02.000-08:00
diff --git a/llama-cpp-2/examples/simple.rs b/llama-cpp-2/examples/simple.rs
@@ -62,7 +62,7 @@ fn main() -> Result<()> {
         ..LlamaContextParams::default()
     };
 
-    let mut ctx = model.new_context(&backend, &ctx_params)
+    let mut ctx = model.new_context(&backend, ctx_params)
         .with_context(|| "unable to create the llama_context")?;
 
     // tokenize the prompt
diff --git a/llama-cpp-2/src/context/params.rs b/llama-cpp-2/src/context/params.rs
@@ -43,7 +43,7 @@ impl From<RopeScalingType> for i8 {
 }
 
 /// A safe wrapper around `llama_context_params`.
-#[derive(Debug, Clone, Copy, PartialEq)]
+#[derive(Debug, PartialEq)]
 #[allow(
     missing_docs,
     clippy::struct_excessive_bools,
@@ -71,6 +71,8 @@ pub struct LlamaContextParams {
     pub logits_all: bool,
     pub embedding: bool,
     pub offload_kqv: bool,
+    pub cb_eval: llama_cpp_sys_2::ggml_backend_sched_eval_callback,
+    pub cb_eval_user_data: *mut std::ffi::c_void,
 }
 
 /// Default parameters for `LlamaContext`. (as defined in llama.cpp by `llama_context_default_params`)
@@ -97,6 +99,8 @@ impl From<llama_context_params> for LlamaContextParams {
             n_threads_batch,
             rope_freq_base,
             rope_freq_scale,
+            cb_eval,
+            cb_eval_user_data,
             type_k,
             type_v,
             mul_mat_q,
@@ -131,6 +135,8 @@ impl From<llama_context_params> for LlamaContextParams {
             yarn_beta_slow,
             yarn_orig_ctx,
             offload_kqv,
+            cb_eval,
+            cb_eval_user_data,
         }
     }
 }
@@ -157,6 +163,8 @@ impl From<LlamaContextParams> for llama_context_params {
             yarn_beta_slow,
             yarn_orig_ctx,
             offload_kqv,
+            cb_eval,
+            cb_eval_user_data,
         }: LlamaContextParams,
     ) -> Self {
         llama_context_params {
@@ -179,6 +187,8 @@ impl From<LlamaContextParams> for llama_context_params {
             yarn_beta_slow,
             yarn_orig_ctx,
             offload_kqv,
+            cb_eval,
+            cb_eval_user_data,
         }
     }
-}
+}
diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs
@@ -309,12 +309,12 @@ impl LlamaModel {
     /// # Errors
     ///
     /// There is many ways this can fail. See [`LlamaContextLoadError`] for more information.
-    pub fn new_context<'a>(
-        &'a self,
+    pub fn new_context(
+        &self,
         _: &LlamaBackend,
-        params: &LlamaContextParams,
-    ) -> Result<LlamaContext<'a>, LlamaContextLoadError> {
-        let context_params = llama_context_params::from(*params);
+        params: LlamaContextParams,
+    ) -> Result<LlamaContext, LlamaContextLoadError> {
+        let context_params = llama_context_params::from(params);
         let context = unsafe {
             llama_cpp_sys_2::llama_new_context_with_model(self.model.as_ptr(), context_params)
         };