From 9a76676dd842c46d98fc02eb3ad1036b17ddbaef Mon Sep 17 00:00:00 2001 From: JakkuSakura Date: Sun, 6 Apr 2025 14:52:25 +0800 Subject: [PATCH 1/5] Refactor update_libchdb.sh for system-wide install and improve README documentation --- .gitignore | 2 +- README.md | 29 ++++++++++-- build.rs | 2 +- update_libchdb.sh | 111 ++++++++++++++++++++++++++++++++++++++++------ wrapper.h | 1 + 5 files changed, 126 insertions(+), 19 deletions(-) create mode 100644 wrapper.h diff --git a/.gitignore b/.gitignore index 58c8d71..f5deb3b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ Cargo.lock *.tar.gz *.so -*.h +chdb.h var udf *.parquet diff --git a/README.md b/README.md index f2e6fd2..a8b9ab5 100644 --- a/README.md +++ b/README.md @@ -3,21 +3,42 @@ [![Rust](https://github.com/chdb-io/chdb-rust/actions/workflows/rust.yml/badge.svg)](https://github.com/chdb-io/chdb-rust/actions/workflows/rust.yml) # chdb-rust + Experimental [chDB](https://github.com/chdb-io/chdb) FFI bindings for Rust -### Status +## Status - Experimental, unstable, subject to changes -- Requires [`libchdb`](https://github.com/chdb-io/chdb) on the system +- Requires [`libchdb`](https://github.com/chdb-io/chdb) on the system. You can install the compatible version from + `install_libchdb.sh` + +## Usage + +### Install libchdb + +You can install it system-wide + +```bash +./update_libchdb.sh --global +``` + +or use it in a local directory + +```bash +./update_libchdb.sh --local +``` + +### Build -#### Build binding ```bash -./update_libchdb.sh RUST_BACKTRACE=full cargo build --verbose + ``` ### Run tests + `cargo test` ### Examples + See `tests` directory. diff --git a/build.rs b/build.rs index fd76fcf..3e748b4 100644 --- a/build.rs +++ b/build.rs @@ -16,7 +16,7 @@ fn main() { let bindings = bindgen::Builder::default() // The input header we would like to generate // bindings for. - .header("chdb.h") + .header("wrapper.h") // Tell cargo to invalidate the built crate whenever any of the // included header files changed. .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) diff --git a/update_libchdb.sh b/update_libchdb.sh index cd23fec..fb6e7e3 100755 --- a/update_libchdb.sh +++ b/update_libchdb.sh @@ -1,13 +1,37 @@ - #!/bin/bash + set -e -cd $(dirname "${BASH_SOURCE[0]}") + +# Check for necessary tools +command -v curl >/dev/null 2>&1 || { echo >&2 "curl is required but it's not installed. Aborting."; exit 1; } +command -v tar >/dev/null 2>&1 || { echo >&2 "tar is required but it's not installed. Aborting."; exit 1; } + +# Function to download and extract the file +download_and_extract() { + local url=$1 + local file="libchdb.tar.gz" + + echo "Attempting to download $PLATFORM from $url" + + # Download the file with a retry logic + if curl -L -o "$file" "$url"; then + echo "Download successful." + + # Optional: Verify download integrity here, if checksums are provided + + # Untar the file + if tar -xzf "$file"; then + echo "Extraction successful." + return 0 + fi + fi + return 1 +} # Get the newest release version -# LATEST_RELEASE=$(curl --silent "https://api.github.com/repos/chdb-io/chdb/releases/latest" | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') LATEST_RELEASE=v2.1.1 -# Download the correct version based on the platform +# Select the correct package based on OS and architecture case "$(uname -s)" in Linux) if [[ $(uname -m) == "aarch64" ]]; then @@ -29,18 +53,79 @@ case "$(uname -s)" in ;; esac +# Main download URL DOWNLOAD_URL="https://github.com/chdb-io/chdb/releases/download/$LATEST_RELEASE/$PLATFORM" +FALLBACK_URL="https://github.com/chdb-io/chdb/releases/latest/download/$PLATFORM" + +# Try the main download URL first +if ! download_and_extract "$DOWNLOAD_URL"; then + echo "Retrying with fallback URL..." + if ! download_and_extract "$FALLBACK_URL"; then + echo "Both primary and fallback downloads failed. Aborting." + exit 1 + fi +fi + +# check if --local flag is passed +if [[ "$1" == "--local" ]]; then + # Set execute permission for libchdb.so + chmod +x libchdb.so + + # Clean up + rm -f libchdb.tar.gz + exit 0 +elif [[ "$1" == "--global" ]]; then + # If current uid is not 0, check if sudo is available and request the user to input the password + if [[ $EUID -ne 0 ]]; then + command -v sudo >/dev/null 2>&1 || { echo >&2 "This script requires sudo privileges but sudo is not installed. Aborting."; exit 1; } + echo "Installation requires administrative access. You will be prompted for your password." + fi + + # Define color messages if terminal supports them + if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + NC='\033[0m' # No Color + REDECHO() { echo -e "${RED}$@${NC}"; } + GREENECHO() { echo -e "${GREEN}$@${NC}"; } + ENDECHO() { echo -ne "${NC}"; } + else + REDECHO() { echo "$@"; } + GREENECHO() { echo "$@"; } + ENDECHO() { :; } + fi + + # Use sudo if not running as root + SUDO='' + if [[ $EUID -ne 0 ]]; then + SUDO='sudo' + GREENECHO "\nYou will be asked for your sudo password to install:" + echo " libchdb.so to /usr/local/lib/" + echo " chdb.h to /usr/local/include/" + fi + + # Make sure the library and header directory exists + ${SUDO} mkdir -p /usr/local/lib /usr/local/include || true -echo "Downloading $PLATFORM from $DOWNLOAD_URL" + # Install the library and header file + ${SUDO} /bin/cp libchdb.so /usr/local/lib/ + ${SUDO} /bin/cp chdb.h /usr/local/include/ -# Download the file -curl -L -o libchdb.tar.gz $DOWNLOAD_URL + # Set execute permission for libchdb.so + ${SUDO} chmod +x /usr/local/lib/libchdb.so -# Untar the file -tar -xzf libchdb.tar.gz + # Update library cache (Linux specific) + if [[ "$(uname -s)" == "Linux" ]]; then + ${SUDO} ldconfig + fi -# Set execute permission for libchdb.so -chmod +x libchdb.so + # Clean up + rm -f libchdb.tar.gz libchdb.so chdb.h -# Clean up -rm -f libchdb.tar.gz + GREENECHO "Installation completed successfully." ; ENDECHO + GREENECHO "If any error occurred, please report it to:" ; ENDECHO + GREENECHO " https://github.com/chdb-io/chdb/issues/new/choose" ; ENDECHO +else + echo "Invalid option. Use --local to install locally or --global to install globally." + exit 1 +fi diff --git a/wrapper.h b/wrapper.h new file mode 100644 index 0000000..205a063 --- /dev/null +++ b/wrapper.h @@ -0,0 +1 @@ +#include "chdb.h" From f8c9408acb452e32ee782e1d08f70c5f1dfbf625 Mon Sep 17 00:00:00 2001 From: JakkuSakura Date: Sun, 6 Apr 2025 18:22:23 +0800 Subject: [PATCH 2/5] Update update_libchdb.sh to change latest release version to v3.1.2 and streamline installation options --- update_libchdb.sh | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/update_libchdb.sh b/update_libchdb.sh index fb6e7e3..695c7cc 100755 --- a/update_libchdb.sh +++ b/update_libchdb.sh @@ -29,7 +29,7 @@ download_and_extract() { } # Get the newest release version -LATEST_RELEASE=v2.1.1 +LATEST_RELEASE=v3.1.2 # Select the correct package based on OS and architecture case "$(uname -s)" in @@ -66,15 +66,9 @@ if ! download_and_extract "$DOWNLOAD_URL"; then fi fi -# check if --local flag is passed -if [[ "$1" == "--local" ]]; then - # Set execute permission for libchdb.so - chmod +x libchdb.so +chmod +x libchdb.so - # Clean up - rm -f libchdb.tar.gz - exit 0 -elif [[ "$1" == "--global" ]]; then +if [[ "$1" == "--global" ]]; then # If current uid is not 0, check if sudo is available and request the user to input the password if [[ $EUID -ne 0 ]]; then command -v sudo >/dev/null 2>&1 || { echo >&2 "This script requires sudo privileges but sudo is not installed. Aborting."; exit 1; } @@ -119,13 +113,7 @@ elif [[ "$1" == "--global" ]]; then ${SUDO} ldconfig fi - # Clean up - rm -f libchdb.tar.gz libchdb.so chdb.h - GREENECHO "Installation completed successfully." ; ENDECHO GREENECHO "If any error occurred, please report it to:" ; ENDECHO GREENECHO " https://github.com/chdb-io/chdb/issues/new/choose" ; ENDECHO -else - echo "Invalid option. Use --local to install locally or --global to install globally." - exit 1 fi From 6d08e655d59016d1ba01a98ac436d8b8771ebe57 Mon Sep 17 00:00:00 2001 From: auxten Date: Mon, 7 Apr 2025 05:40:40 +0000 Subject: [PATCH 3/5] Fix libchdb version to v2.1.1 until libchdb v3 based chdb-rust refactor --- .github/workflows/rust.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index ca00651..f3e1160 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -16,7 +16,9 @@ jobs: - uses: actions/checkout@v3 - name: Fetch library run: | - wget https://github.com/chdb-io/chdb/releases/latest/download/linux-x86_64-libchdb.tar.gz + #wget https://github.com/chdb-io/chdb/releases/latest/download/linux-x86_64-libchdb.tar.gz + #Fix libchdb version to v2.1.1 until libchdb v3 based chdb-rust refactor + wget https://github.com/chdb-io/chdb/releases/download/v2.1.1/linux-x86_64-libchdb.tar.gz tar -xzf linux-x86_64-libchdb.tar.gz sudo mv libchdb.so /usr/lib/libchdb.so sudo ldconfig From e36bb8246bbefde82974f0e458966b1409195b82 Mon Sep 17 00:00:00 2001 From: JakkuSakura Date: Sun, 6 Apr 2025 15:36:04 +0800 Subject: [PATCH 4/5] Enhance error handling and simplify Result type usage in the library --- Cargo.toml | 3 +++ src/error.rs | 7 +++++- src/lib.rs | 17 +++++++++------ src/query_result.rs | 43 ++++++++++++++++++++++++------------- src/session.rs | 6 +----- tests/examples.rs | 52 +++++++++++++++++++++------------------------ 6 files changed, 72 insertions(+), 56 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bde8879..5ea76bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,3 +9,6 @@ thiserror = "1" [build-dependencies] bindgen = "0.70.1" + +[dev-dependencies] +tempdir = "0.3.7" \ No newline at end of file diff --git a/src/error.rs b/src/error.rs index cf9023f..e1276a2 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,9 +1,12 @@ use std::ffi::NulError; +use std::string::FromUtf8Error; #[derive(Debug, thiserror::Error)] pub enum Error { #[error("An unknown error has occurred")] Unknown, + #[error("No result")] + NoResult, #[error("Invalid data: {0}")] InvalidData(String), #[error("Invalid path")] @@ -15,7 +18,9 @@ pub enum Error { #[error("Insufficient dir permissions")] InsufficientPermissions, #[error("Non UTF-8 sequence: {0}")] - NonUtf8Sequence(String), + NonUtf8Sequence(FromUtf8Error), #[error("{0}")] QueryError(String), } + +pub type Result = std::result::Result; diff --git a/src/lib.rs b/src/lib.rs index 2c3e606..5ff5bf6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,9 +17,10 @@ use std::ffi::{c_char, CString}; use crate::arg::Arg; use crate::error::Error; +use crate::error::Result; use crate::query_result::QueryResult; -pub fn execute(query: &str, query_args: Option<&[Arg]>) -> Result, Error> { +pub fn execute(query: &str, query_args: Option<&[Arg]>) -> Result { let mut argv = Vec::with_capacity(query_args.as_ref().map_or(0, |v| v.len()) + 2); argv.push(arg_clickhouse()?.into_raw()); @@ -33,26 +34,28 @@ pub fn execute(query: &str, query_args: Option<&[Arg]>) -> Result) -> Result, Error> { +fn call_chdb(mut argv: Vec<*mut c_char>) -> Result { let argc = argv.len() as i32; let argv = argv.as_mut_ptr(); let result_ptr = unsafe { bindings::query_stable_v2(argc, argv) }; if result_ptr.is_null() { - return Ok(None); + return Err(Error::NoResult); } + let result = QueryResult::new(result_ptr); + let result = result.check_error()?; - Ok(Some(QueryResult(result_ptr).check_error()?)) + Ok(result) } -fn arg_clickhouse() -> Result { +fn arg_clickhouse() -> Result { Ok(CString::new("clickhouse")?) } -fn arg_data_path(value: &str) -> Result { +fn arg_data_path(value: &str) -> Result { Ok(CString::new(format!("--path={}", value))?) } -fn arg_query(value: &str) -> Result { +fn arg_query(value: &str) -> Result { Ok(CString::new(format!("--query={}", value))?) } diff --git a/src/query_result.rs b/src/query_result.rs index 0d1f331..64389e8 100644 --- a/src/query_result.rs +++ b/src/query_result.rs @@ -5,17 +5,23 @@ use std::time::Duration; use crate::bindings; use crate::error::Error; - +use crate::error::Result; #[derive(Clone)] -pub struct QueryResult(pub(crate) *mut bindings::local_result_v2); +pub struct QueryResult { + inner: *mut bindings::local_result_v2, +} impl QueryResult { - pub fn data_utf8(&self) -> Result { - String::from_utf8(self.data_ref().to_vec()) - .map_err(|e| Error::NonUtf8Sequence(e.to_string())) + pub(crate) fn new(inner: *mut bindings::local_result_v2) -> Self { + Self { inner } } + pub fn data_utf8(&self) -> Result { + let buf = self.data_ref(); - pub fn data_utf8_lossy<'a>(&'a self) -> Cow<'a, str> { + String::from_utf8(buf.to_vec()).map_err(Error::NonUtf8Sequence) + } + + pub fn data_utf8_lossy(&self) -> Cow { String::from_utf8_lossy(self.data_ref()) } @@ -24,30 +30,37 @@ impl QueryResult { } pub fn data_ref(&self) -> &[u8] { - let buf = unsafe { (*self.0).buf }; - let len = unsafe { (*self.0).len }; + let inner = self.inner; + let buf = unsafe { (*inner).buf }; + let len = unsafe { (*inner).len }; let bytes: &[u8] = unsafe { slice::from_raw_parts(buf as *const u8, len) }; bytes } pub fn rows_read(&self) -> u64 { - (unsafe { *self.0 }).rows_read + let inner = self.inner; + unsafe { *inner }.rows_read } pub fn bytes_read(&self) -> u64 { - unsafe { (*self.0).bytes_read } + let inner = self.inner; + unsafe { *inner }.bytes_read } pub fn elapsed(&self) -> Duration { - let elapsed = unsafe { (*self.0).elapsed }; + let elapsed = unsafe { (*self.inner).elapsed }; Duration::from_secs_f64(elapsed) } - pub(crate) fn check_error(self) -> Result { - let err_ptr = unsafe { (*self.0).error_message }; + pub(crate) fn check_error(self) -> Result { + self.check_error_ref()?; + Ok(self) + } + pub(crate) fn check_error_ref(&self) -> Result<()> { + let err_ptr = unsafe { (*self.inner).error_message }; if err_ptr.is_null() { - return Ok(self); + return Ok(()); } Err(Error::QueryError(unsafe { @@ -58,6 +71,6 @@ impl QueryResult { impl Drop for QueryResult { fn drop(&mut self) { - unsafe { bindings::free_result_v2(self.0) }; + unsafe { bindings::free_result_v2(self.inner) }; } } diff --git a/src/session.rs b/src/session.rs index 0c1309c..591ea1d 100644 --- a/src/session.rs +++ b/src/session.rs @@ -82,11 +82,7 @@ impl<'a> Default for SessionBuilder<'a> { } impl Session { - pub fn execute( - &self, - query: &str, - query_args: Option<&[Arg]>, - ) -> Result, Error> { + pub fn execute(&self, query: &str, query_args: Option<&[Arg]>) -> Result { let mut argv = Vec::with_capacity( self.default_args.len() + query_args.as_ref().map_or(0, |v| v.len()) + 1, ); diff --git a/tests/examples.rs b/tests/examples.rs index 57b7252..a59f614 100644 --- a/tests/examples.rs +++ b/tests/examples.rs @@ -1,4 +1,5 @@ use chdb_rust::arg::Arg; +use chdb_rust::error::Result; use chdb_rust::execute; use chdb_rust::format::InputFormat; use chdb_rust::format::OutputFormat; @@ -6,63 +7,59 @@ use chdb_rust::log_level::LogLevel; use chdb_rust::session::SessionBuilder; #[test] -fn stateful() { +fn test_stateful() -> Result<()> { // // Create session. // - + let tmp = tempdir::TempDir::new("chdb-rust")?; let session = SessionBuilder::new() - .with_data_path("/tmp/chdb") + .with_data_path(tmp.path()) .with_arg(Arg::LogLevel(LogLevel::Debug)) .with_arg(Arg::Custom("priority".into(), Some("1".into()))) .with_auto_cleanup(true) - .build() - .unwrap(); + .build()?; // // Create database. // - session - .execute("CREATE DATABASE demo; USE demo", Some(&[Arg::MultiQuery])) - .unwrap(); + session.execute("CREATE DATABASE demo; USE demo", Some(&[Arg::MultiQuery]))?; // // Create table. // - session - .execute( - "CREATE TABLE logs (id UInt64, msg String) ENGINE = MergeTree ORDER BY id", - None, - ) - .unwrap(); + session.execute( + "CREATE TABLE logs (id UInt64, msg String) ENGINE = MergeTree() ORDER BY id", + None, + )?; // // Insert into table. // - session - .execute("INSERT INTO logs (id, msg) VALUES (1, 'test')", None) - .unwrap(); + session.execute("INSERT INTO logs (id, msg) VALUES (1, 'test')", None)?; // // Select from table. // + let len = session.execute( + "SELECT COUNT(*) FROM logs", + Some(&[Arg::OutputFormat(OutputFormat::JSONEachRow)]), + )?; - let result = session - .execute( - "SELECT * FROM logs", - Some(&[Arg::OutputFormat(OutputFormat::JSONEachRow)]), - ) - .unwrap() - .unwrap(); + assert_eq!(len.data_utf8_lossy(), "{\"COUNT()\":1}\n"); + let result = session.execute( + "SELECT * FROM logs", + Some(&[Arg::OutputFormat(OutputFormat::JSONEachRow)]), + )?; assert_eq!(result.data_utf8_lossy(), "{\"id\":1,\"msg\":\"test\"}\n"); + Ok(()) } #[test] -fn stateless() { +fn test_stateless() -> Result<()> { let query = format!( "SELECT * FROM file('tests/logs.csv', {})", InputFormat::CSV.as_str() @@ -71,9 +68,8 @@ fn stateless() { let result = execute( &query, Some(&[Arg::OutputFormat(OutputFormat::JSONEachRow)]), - ) - .unwrap() - .unwrap(); + )?; assert_eq!(result.data_utf8_lossy(), "{\"id\":1,\"msg\":\"test\"}\n"); + Ok(()) } From 85db3a80614728c06ab4d7dd9755fedcd7cf8659 Mon Sep 17 00:00:00 2001 From: auxten Date: Mon, 7 Apr 2025 05:51:20 +0000 Subject: [PATCH 5/5] Fix libchdb to v2.1.1 until v3 refactor --- update_libchdb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/update_libchdb.sh b/update_libchdb.sh index 695c7cc..db4717b 100755 --- a/update_libchdb.sh +++ b/update_libchdb.sh @@ -29,7 +29,7 @@ download_and_extract() { } # Get the newest release version -LATEST_RELEASE=v3.1.2 +LATEST_RELEASE=v2.1.1 # Select the correct package based on OS and architecture case "$(uname -s)" in