Merge pull request #488 from thewh1teagle/thewh1teagle/patch-1

MarcusDunn · web-flow · commit 301c4e2e458e · 2024-08-31T15:23:43.000-07:00
Thewh1teagle/patch 1
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [workspace]
 resolver = "2"
-members = ["llama-cpp-sys-2", "llama-cpp-2", "embeddings", "examples/usage", "examples/simple"]
+members = ["llama-cpp-sys-2", "llama-cpp-2", "embeddings", "examples/simple"]
 
 [workspace.dependencies]
 # core library deps
diff --git a/examples/usage.rs b/examples/usage.rs
@@ -1,29 +1,30 @@
 //! # Usage
-//! 
+//!
 //! This is just about the smallest possible way to do inference. To fetch a model from hugging face:
-//! 
-//! ```bash
+//!
+//! ```console
 //! git clone --recursive https://github.com/utilityai/llama-cpp-rs
 //! cd llama-cpp-rs/examples/usage
 //! wget https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-GGUF/resolve/main/qwen2-1_5b-instruct-q4_0.gguf
-//! cargo run --bin usage -- qwen2-1_5b-instruct-q4_0.gguf
+//! cargo run --example usage -- qwen2-1_5b-instruct-q4_0.gguf
 //! ```
-use std::io::Write;
 use llama_cpp_2::context::params::LlamaContextParams;
 use llama_cpp_2::llama_backend::LlamaBackend;
 use llama_cpp_2::llama_batch::LlamaBatch;
 use llama_cpp_2::model::params::LlamaModelParams;
 use llama_cpp_2::model::LlamaModel;
 use llama_cpp_2::model::{AddBos, Special};
 use llama_cpp_2::token::data_array::LlamaTokenDataArray;
+use std::io::Write;
 
 #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)]
 fn main() {
     let model_path = std::env::args().nth(1).expect("Please specify model path");
     let backend = LlamaBackend::init().unwrap();
     let params = LlamaModelParams::default();
 
-    let prompt = "<|im_start|>user\nHello! how are you?<|im_end|>\n<|im_start|>assistant\n".to_string();
+    let prompt =
+        "<|im_start|>user\nHello! how are you?<|im_end|>\n<|im_start|>assistant\n".to_string();
     LlamaContextParams::default();
     let model =
         LlamaModel::load_from_file(&backend, model_path, &params).expect("unable to load model");
@@ -48,14 +49,11 @@ fn main() {
     }
     ctx.decode(&mut batch).expect("llama_decode() failed");
 
-
     let mut n_cur = batch.n_tokens();
 
-
     // The `Decoder`
     let mut decoder = encoding_rs::UTF_8.new_decoder();
 
-
     while n_cur <= n_len {
         // sample the next token
         {
@@ -72,7 +70,9 @@ fn main() {
                 break;
             }
 
-            let output_bytes = model.token_to_bytes(new_token_id, Special::Tokenize).unwrap();
+            let output_bytes = model
+                .token_to_bytes(new_token_id, Special::Tokenize)
+                .unwrap();
             // use `Decoder.decode_to_string()` to avoid the intermediate buffer
             let mut output_string = String::with_capacity(32);
             let _decode_result = decoder.decode_to_string(&output_bytes, &mut output_string, false);
diff --git a/examples/usage/Cargo.toml b/examples/usage/Cargo.toml
diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml
@@ -14,6 +14,9 @@ llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.69" }
 thiserror = { workspace = true }
 tracing = { workspace = true }
 
+[dev-dependencies]
+encoding_rs = { workspace = true }
+
 [features]
 cuda = ["llama-cpp-sys-2/cuda"]
 metal = ["llama-cpp-sys-2/metal"]
@@ -32,3 +35,7 @@ workspace = true
 
 [package.metadata.docs.rs]
 features = ["sampler"]
+
+[[example]]
+name = "usage"
+path = "../examples/usage.rs"
diff --git a/llama-cpp-2/src/context/sample/sampler.rs b/llama-cpp-2/src/context/sample/sampler.rs
@@ -3,7 +3,7 @@
 //! like [`crate::context::LlamaContext`] or token history to the sampler.
 //!
 //! # Example
-//! 
+//!
 //! **Llama.cpp default sampler**
 //!
 //! ```rust
diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs
@@ -1,6 +1,6 @@
 //! A safe wrapper around `llama_model`.
-use std::ffi::CString;
 use std::ffi::CStr;
+use std::ffi::CString;
 use std::num::NonZeroU16;
 use std::os::raw::c_int;
 use std::path::Path;
@@ -550,7 +550,11 @@ impl LlamaModel {
             if res > buff.len() as i32 {
                 return Err(ApplyChatTemplateError::BuffSizeError);
             }
-            Ok::<String, ApplyChatTemplateError>(CStr::from_ptr(buff.as_mut_ptr()).to_string_lossy().to_string())
+            Ok::<String, ApplyChatTemplateError>(
+                CStr::from_ptr(buff.as_mut_ptr())
+                    .to_string_lossy()
+                    .to_string(),
+            )
         }?;
         Ok(formatted_chat)
     }
diff --git a/llama-cpp-2/src/model/params/kv_overrides.rs b/llama-cpp-2/src/model/params/kv_overrides.rs
@@ -33,17 +33,13 @@ impl ParamOverrideValue {
                 llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_bool: *value }
             }
             ParamOverrideValue::Float(value) => {
-                llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 {
-                    val_f64: *value,
-                }
+                llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_f64: *value }
             }
             ParamOverrideValue::Int(value) => {
                 llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_i64: *value }
             }
             ParamOverrideValue::Str(c_string) => {
-                llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 {
-                    val_str: *c_string,
-                }
+                llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_str: *c_string }
             }
         }
     }
diff --git a/llama-cpp-2/src/token_type.rs b/llama-cpp-2/src/token_type.rs
@@ -42,7 +42,7 @@ impl TryFrom<llama_cpp_sys_2::llama_token_type> for LlamaTokenAttrs {
     type Error = LlamaTokenTypeFromIntError;
 
     fn try_from(value: llama_cpp_sys_2::llama_vocab_type) -> Result<Self, Self::Error> {
-        Ok(Self(BitFlags::from_bits(value).map_err(|e| {
+        Ok(Self(BitFlags::from_bits(value as _).map_err(|e| {
             LlamaTokenTypeFromIntError::UnknownValue(e.invalid_bits())
         })?))
     }
diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
@@ -91,11 +91,9 @@ fn compile_bindings(
     llama_header_path: &Path,
 ) -> Result<(), Box<dyn std::error::Error + 'static>> {
     println!("Generating bindings..");
-    
-    let includes = [
-        llama_header_path.join("ggml").join("include"),
-    ];
-    
+
+    let includes = [llama_header_path.join("ggml").join("include")];
+
     let bindings = bindgen::Builder::default()
         .clang_args(includes.map(|path| format!("-I{}", path.to_string_lossy())))
         .header(
@@ -425,9 +423,7 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati
     //     nvcc.flag("-Wno-pedantic");
     // }
 
-    for lib in [
-        "cuda", "cublas", "cudart", "cublasLt"
-    ] {
+    for lib in ["cuda", "cublas", "cudart", "cublasLt"] {
         println!("cargo:rustc-link-lib={}", lib);
     }
     if !nvcc.get_compiler().is_like_msvc() {
@@ -623,31 +619,44 @@ fn gen_vulkan_shaders(out_path: impl AsRef<Path>) -> (impl AsRef<Path>, impl AsR
         .cpp(true)
         .get_compiler();
 
-    assert!(!cxx.is_like_msvc(), "Compiling Vulkan GGML with MSVC is not supported at this time.");
+    assert!(
+        !cxx.is_like_msvc(),
+        "Compiling Vulkan GGML with MSVC is not supported at this time."
+    );
 
     let vulkan_shaders_gen_bin = out_path.as_ref().join("vulkan-shaders-gen");
 
     cxx.to_command()
         .args([
-            vulkan_shaders_src.join("vulkan-shaders-gen.cpp").as_os_str(),
-            "-o".as_ref(), vulkan_shaders_gen_bin.as_os_str()
+            vulkan_shaders_src
+                .join("vulkan-shaders-gen.cpp")
+                .as_os_str(),
+            "-o".as_ref(),
+            vulkan_shaders_gen_bin.as_os_str(),
         ])
-        .output().expect("Could not compile Vulkan shader generator");
+        .output()
+        .expect("Could not compile Vulkan shader generator");
 
     let header = out_path.as_ref().join("ggml-vulkan-shaders.hpp");
     let source = out_path.as_ref().join("ggml-vulkan-shaders.cpp");
 
     Command::new(vulkan_shaders_gen_bin)
         .args([
-            "--glslc".as_ref(), "glslc".as_ref(),
-            "--input-dir".as_ref(), vulkan_shaders_src.as_os_str(),
-            "--output-dir".as_ref(), out_path.as_ref().join("vulkan-shaders.spv").as_os_str(),
-            "--target-hpp".as_ref(), header.as_os_str(),
-            "--target-cpp".as_ref(), source.as_os_str(),
-            "--no-clean".as_ref()
+            "--glslc".as_ref(),
+            "glslc".as_ref(),
+            "--input-dir".as_ref(),
+            vulkan_shaders_src.as_os_str(),
+            "--output-dir".as_ref(),
+            out_path.as_ref().join("vulkan-shaders.spv").as_os_str(),
+            "--target-hpp".as_ref(),
+            header.as_os_str(),
+            "--target-cpp".as_ref(),
+            source.as_os_str(),
+            "--no-clean".as_ref(),
         ])
-        .output().expect("Could not run Vulkan shader generator");
-    
+        .output()
+        .expect("Could not run Vulkan shader generator");
+
     (out_path, source)
 }
 

Original file line number	Diff line number	Diff line change
`@@ -33,17 +33,13 @@ impl ParamOverrideValue {`
`33`	`33`	`llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_bool: *value }`
`34`	`34`	`}`
`35`	`35`	`ParamOverrideValue::Float(value) => {`
`36`		`- llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 {`
`37`		`- val_f64: *value,`
`38`		`- }`
	`36`	`+ llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_f64: *value }`
`39`	`37`	`}`
`40`	`38`	`ParamOverrideValue::Int(value) => {`
`41`	`39`	`llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_i64: *value }`
`42`	`40`	`}`
`43`	`41`	`ParamOverrideValue::Str(c_string) => {`
`44`		`- llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 {`
`45`		`- val_str: *c_string,`
`46`		`- }`
	`42`	`+ llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_str: *c_string }`
`47`	`43`	`}`
`48`	`44`	`}`
`49`	`45`	`}`
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ impl TryFrom<llama_cpp_sys_2::llama_token_type> for LlamaTokenAttrs {`
`42`	`42`	`type Error = LlamaTokenTypeFromIntError;`
`43`	`43`
`44`	`44`	`fn try_from(value: llama_cpp_sys_2::llama_vocab_type) -> Result<Self, Self::Error> {`
`45`		`- Ok(Self(BitFlags::from_bits(value).map_err(\|e\| {`
	`45`	`+ Ok(Self(BitFlags::from_bits(value as _).map_err(\|e\| {`
`46`	`46`	`LlamaTokenTypeFromIntError::UnknownValue(e.invalid_bits())`
`47`	`47`	`})?))`
`48`	`48`	`}`