diff --git a/docs/components/model_catalog.md b/docs/components/model_catalog.md index 0982ae77..1ddd46e1 100644 --- a/docs/components/model_catalog.md +++ b/docs/components/model_catalog.md @@ -195,6 +195,21 @@ ModelCatalog().register_open_chat_model("my_open_chat_model2", model_type="chat") ``` +## DuckDB Integration + +DuckDB is an in-process SQL OLAP database management system designed for analytical workloads. It now supports similarity search using vectors, making it a great fit for Retrieval-Augmented Generation (RAG) workflows. + +### Features +- Lightweight and efficient, even on local machines. +- Supports vectorized execution for analytical queries. +- Enables similarity search for RAG workflows. + +### Example Usage +Refer to the example script `examples/Models/duckdb_rag_example.py` for a demonstration of how to use DuckDB with `llmware`. + +### Testing +Tests for DuckDB integration can be found in `tests/models/test_duckdb_integration.py`. + Need help or have questions? ============================ diff --git a/examples/Models/duckdb_rag_example.py b/examples/Models/duckdb_rag_example.py new file mode 100644 index 00000000..d9228a65 --- /dev/null +++ b/examples/Models/duckdb_rag_example.py @@ -0,0 +1,28 @@ +from llmware.duckdb_integration import DuckDBIntegration + +def main(): + # Initialize DuckDB + db = DuckDBIntegration(db_path='example.duckdb') + + # Create a table for documents + db.create_table('documents', 'id INTEGER, content TEXT') + + # Insert example data + documents = [ + (1, 'DuckDB is an in-process SQL OLAP database management system.'), + (2, 'It is designed for analytical workloads and supports vectorized execution.'), + (3, 'DuckDB recently added support for similarity search using vectors.') + ] + db.insert_data('documents', documents) + + # Query the table + results = db.query('SELECT * FROM documents;') + print('Documents in the database:') + for row in results: + print(row) + + # Close the connection + db.close() + +if __name__ == '__main__': + main() diff --git a/llmware/duckdb_integration.py b/llmware/duckdb_integration.py new file mode 100644 index 00000000..b27211b0 --- /dev/null +++ b/llmware/duckdb_integration.py @@ -0,0 +1,44 @@ +import duckdb + +class DuckDBIntegration: + def __init__(self, db_path: str = ':memory:'): + """Initialize the DuckDB connection. + + Args: + db_path (str): Path to the DuckDB database file. Defaults to in-memory. + """ + self.connection = duckdb.connect(database=db_path, read_only=False) + + def create_table(self, table_name: str, schema: str): + """Create a table in the DuckDB database. + + Args: + table_name (str): Name of the table to create. + schema (str): Schema definition for the table. + """ + self.connection.execute(f"CREATE TABLE IF NOT EXISTS {table_name} ({schema});") + + def insert_data(self, table_name: str, data: list): + """Insert data into a DuckDB table. + + Args: + table_name (str): Name of the table to insert data into. + data (list): List of tuples representing rows to insert. + """ + placeholders = ', '.join(['?'] * len(data[0])) + self.connection.executemany(f"INSERT INTO {table_name} VALUES ({placeholders});", data) + + def query(self, sql: str): + """Execute a query on the DuckDB database. + + Args: + sql (str): SQL query to execute. + + Returns: + list: Query results. + """ + return self.connection.execute(sql).fetchall() + + def close(self): + """Close the DuckDB connection.""" + self.connection.close() diff --git a/llmware/requirements.txt b/llmware/requirements.txt index bd79605c..72dda02f 100644 --- a/llmware/requirements.txt +++ b/llmware/requirements.txt @@ -7,5 +7,6 @@ huggingface-hub>=0.19.4 tokenizers>=0.15.0 boto3>=1.24.53 colorama==0.4.6 +duckdb>=0.7.1 diff --git a/tests/models/test_duckdb_integration.py b/tests/models/test_duckdb_integration.py new file mode 100644 index 00000000..94013fa0 --- /dev/null +++ b/tests/models/test_duckdb_integration.py @@ -0,0 +1,29 @@ +""" Test for DuckDB integration in llmware""" +from llmware.duckdb_integration import DuckDBIntegration + +def test_duckdb_integration(): + # Initialize DuckDB in memory + db = DuckDBIntegration() + + # Create a table + db.create_table('test_table', 'id INTEGER, name TEXT') + + # Insert data + data = [ + (1, 'Alice'), + (2, 'Bob'), + (3, 'Charlie') + ] + db.insert_data('test_table', data) + + # Query the data + results = db.query('SELECT * FROM test_table;') + + # Validate the results + assert len(results) == 3 + assert results[0] == (1, 'Alice') + assert results[1] == (2, 'Bob') + assert results[2] == (3, 'Charlie') + + # Close the connection + db.close()