diff --git a/core/go.mod b/core/go.mod
index 7207ace..7b6fa86 100644
--- a/core/go.mod
+++ b/core/go.mod
@@ -21,6 +21,7 @@ require (
gorm.io/driver/postgres v1.5.11
gorm.io/driver/sqlite v1.5.7
gorm.io/gorm v1.25.12
+ github.com/alifiroozi80/duckdb v1.1.1
)
require (
diff --git a/core/graph/schema.graphqls b/core/graph/schema.graphqls
index 6e8b966..cb609af 100644
--- a/core/graph/schema.graphqls
+++ b/core/graph/schema.graphqls
@@ -7,6 +7,7 @@ enum DatabaseType {
ElasticSearch,
MariaDB,
ClickHouse,
+ DuckDB,
}
type Column {
diff --git a/core/src/engine/engine.go b/core/src/engine/engine.go
index af1dcd2..10133c2 100644
--- a/core/src/engine/engine.go
+++ b/core/src/engine/engine.go
@@ -23,6 +23,7 @@ const (
DatabaseType_MySQL = "MySQL"
DatabaseType_MariaDB = "MariaDB"
DatabaseType_Sqlite3 = "Sqlite3"
+ DatabaseType_DuckDB = "DuckDB"
DatabaseType_MongoDB = "MongoDB"
DatabaseType_Redis = "Redis"
DatabaseType_ElasticSearch = "ElasticSearch"
diff --git a/core/src/plugins/duckdb/add.go b/core/src/plugins/duckdb/add.go
new file mode 100644
index 0000000..56ef28e
--- /dev/null
+++ b/core/src/plugins/duckdb/add.go
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2025 Clidey, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package duckdb
+
+import (
+ "fmt"
+
+ "github.com/clidey/whodb/core/src/engine"
+ "github.com/clidey/whodb/core/src/plugins"
+ "gorm.io/gorm"
+)
+
+func (p *DuckDBPlugin) AddStorageUnitRow(config *engine.PluginConfig, schema string, storageUnit string, values []engine.Record) error {
+ return plugins.WithConnection(config, p.DB, func(db *gorm.DB) error {
+ tableName := p.FormTableName(schema, storageUnit)
+
+ data, err := p.ConvertRecordValuesToMap(values)
+ if err != nil {
+ return err
+ }
+
+ return db.Table(tableName).Create(data).Error
+ })
+}
+
+func (p *DuckDBPlugin) AddStorageUnit(config *engine.PluginConfig, schema string, storageUnit string, columns []engine.Record) error {
+ return plugins.WithConnection(config, p.DB, func(db *gorm.DB) error {
+ query := p.GetCreateTableQuery(schema, storageUnit, columns)
+ return db.Exec(query).Error
+ })
+}
\ No newline at end of file
diff --git a/core/src/plugins/duckdb/db.go b/core/src/plugins/duckdb/db.go
new file mode 100644
index 0000000..05a61e5
--- /dev/null
+++ b/core/src/plugins/duckdb/db.go
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2025 Clidey, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package duckdb
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/alifiroozi80/duckdb"
+ "github.com/clidey/whodb/core/src/engine"
+ "github.com/clidey/whodb/core/src/env"
+ "gorm.io/gorm"
+)
+
+func getDefaultDirectory() string {
+ directory := "/db/"
+ if env.IsDevelopment {
+ directory = "tmp/"
+ }
+ return directory
+}
+
+var errDoesNotExist = errors.New("unauthorized or the database doesn't exist")
+
+func (p *DuckDBPlugin) DB(config *engine.PluginConfig) (*gorm.DB, error) {
+ connectionInput, err := p.ParseConnectionConfig(config)
+ if err != nil {
+ return nil, err
+ }
+
+ database := connectionInput.Database
+ fileNameDatabase := filepath.Join(getDefaultDirectory(), database)
+
+ // Security check: ensure the file path is within the allowed directory
+ if !strings.HasPrefix(fileNameDatabase, getDefaultDirectory()) {
+ return nil, errDoesNotExist
+ }
+
+ // Check if file exists
+ if _, err := os.Stat(fileNameDatabase); errors.Is(err, os.ErrNotExist) {
+ return nil, errDoesNotExist
+ }
+
+ // Validate file extension (accept .duckdb, .ddb, .db as requested)
+ ext := strings.ToLower(filepath.Ext(fileNameDatabase))
+ if ext != ".duckdb" && ext != ".ddb" && ext != ".db" {
+ return nil, fmt.Errorf("unsupported file extension: %s. Only .duckdb, .ddb, and .db files are supported", ext)
+ }
+
+ // Create connection string for DuckDB
+ // DuckDB supports various connection options for performance and behavior tuning
+ dsn := fileNameDatabase
+
+ // Add DuckDB-specific connection options
+ params := make([]string, 0)
+
+ // Access mode: read_only or read_write
+ if connectionInput.DuckDBAccessMode != "" && connectionInput.DuckDBAccessMode != "read_write" {
+ params = append(params, fmt.Sprintf("access_mode=%s", connectionInput.DuckDBAccessMode))
+ }
+
+ // Thread configuration for parallel execution
+ if connectionInput.DuckDBThreads != "" {
+ params = append(params, fmt.Sprintf("threads=%s", connectionInput.DuckDBThreads))
+ }
+
+ // Memory limit configuration (e.g., "1GB", "512MB")
+ if connectionInput.DuckDBMaxMemory != "" {
+ params = append(params, fmt.Sprintf("max_memory=%s", connectionInput.DuckDBMaxMemory))
+ }
+
+ // Temporary directory for intermediate results
+ if connectionInput.DuckDBTempDirectory != "" {
+ params = append(params, fmt.Sprintf("temp_directory=%s", connectionInput.DuckDBTempDirectory))
+ }
+
+ // Add any extra connection options if needed
+ if connectionInput.ExtraOptions != nil && len(connectionInput.ExtraOptions) > 0 {
+ for key, value := range connectionInput.ExtraOptions {
+ params = append(params, fmt.Sprintf("%s=%s", key, value))
+ }
+ }
+
+ // Build final DSN
+ if len(params) > 0 {
+ dsn += "?" + strings.Join(params, "&")
+ }
+
+ db, err := gorm.Open(duckdb.Open(dsn), &gorm.Config{})
+ if err != nil {
+ return nil, fmt.Errorf("failed to connect to DuckDB: %w", err)
+ }
+
+ // After successful connection, enable CSV/Parquet reading from the same directory
+ if err := p.setupFileAccess(db, fileNameDatabase); err != nil {
+ return nil, fmt.Errorf("failed to setup file access: %w", err)
+ }
+
+ return db, nil
+}
+
+// setupFileAccess configures DuckDB to allow reading CSV and Parquet files from the same directory as the database
+func (p *DuckDBPlugin) setupFileAccess(db *gorm.DB, dbFilePath string) error {
+ // Get the directory containing the database file
+ dbDir := filepath.Dir(dbFilePath)
+
+ // Enable the httpfs extension for reading files (optional, for http/https URLs)
+ // This is disabled by default for security
+
+ // Create views or helper functions for reading CSV/Parquet files from the same directory
+ // This is done by creating a function that validates file paths
+
+ // For now, we'll just validate that the directory is accessible
+ // The actual CSV/Parquet reading will be done through raw SQL queries
+ // that we'll validate to ensure they only access files in the same directory
+
+ _, err := os.Stat(dbDir)
+ if err != nil {
+ return fmt.Errorf("database directory not accessible: %w", err)
+ }
+
+ return nil
+}
+
+// ValidateFileAccess ensures that a file path is within the allowed directory (same as database)
+func (p *DuckDBPlugin) ValidateFileAccess(dbFilePath, requestedFilePath string) error {
+ dbDir := filepath.Dir(dbFilePath)
+
+ // Clean the requested file path
+ cleanPath := filepath.Clean(requestedFilePath)
+
+ // If it's not an absolute path, make it relative to the database directory
+ if !filepath.IsAbs(cleanPath) {
+ cleanPath = filepath.Join(dbDir, cleanPath)
+ }
+
+ // Get absolute path of the database directory for comparison
+ dbDirAbs, err := filepath.Abs(dbDir)
+ if err != nil {
+ return fmt.Errorf("cannot resolve database directory: %w", err)
+ }
+
+ // SECURITY FIX: Resolve symlinks BEFORE checking directory containment
+ // This prevents symlink-based directory traversal attacks
+ resolvedPath, err := filepath.EvalSymlinks(cleanPath)
+ if err != nil {
+ // If symlink resolution fails, the file might not exist or be inaccessible
+ return fmt.Errorf("cannot resolve file path (file may not exist or be inaccessible): %w", err)
+ }
+
+ // Ensure the resolved path is within the database directory
+ if !strings.HasPrefix(resolvedPath, dbDirAbs+string(filepath.Separator)) && resolvedPath != dbDirAbs {
+ return fmt.Errorf("file access denied: file must be in the same directory as the database")
+ }
+
+ // Check if the file exists (should exist since EvalSymlinks succeeded, but double-check)
+ if _, err := os.Stat(resolvedPath); os.IsNotExist(err) {
+ return fmt.Errorf("file does not exist: %s", resolvedPath)
+ }
+
+ // Check if it's a CSV or Parquet file
+ ext := strings.ToLower(filepath.Ext(resolvedPath))
+ if ext != ".csv" && ext != ".parquet" {
+ return fmt.Errorf("unsupported file type: %s. Only .csv and .parquet files are supported", ext)
+ }
+
+ return nil
+}
\ No newline at end of file
diff --git a/core/src/plugins/duckdb/duckdb.go b/core/src/plugins/duckdb/duckdb.go
new file mode 100644
index 0000000..6d81bdb
--- /dev/null
+++ b/core/src/plugins/duckdb/duckdb.go
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2025 Clidey, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package duckdb
+
+import (
+ "database/sql"
+ "errors"
+ "fmt"
+ "log"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/clidey/whodb/core/src/engine"
+ "github.com/clidey/whodb/core/src/plugins"
+ gorm_plugin "github.com/clidey/whodb/core/src/plugins/gorm"
+ mapset "github.com/deckarep/golang-set/v2"
+ "gorm.io/gorm"
+)
+
+var (
+ supportedColumnDataTypes = mapset.NewSet(
+ "BOOLEAN", "TINYINT", "SMALLINT", "INTEGER", "BIGINT", "UTINYINT", "USMALLINT", "UINTEGER", "UBIGINT", "HUGEINT",
+ "REAL", "DOUBLE", "DECIMAL", "NUMERIC",
+ "VARCHAR", "CHAR", "TEXT", "STRING", "BPCHAR",
+ "BYTEA", "BLOB", "VARBINARY", "BINARY",
+ "DATE", "TIME", "TIMESTAMP", "TIMESTAMPTZ", "INTERVAL",
+ "UUID", "JSON",
+ "ARRAY", "LIST", "STRUCT", "MAP", "UNION",
+ "BIT", "BITSTRING",
+ )
+)
+
+type DuckDBPlugin struct {
+ gorm_plugin.GormPlugin
+}
+
+func (p *DuckDBPlugin) GetSupportedColumnDataTypes() mapset.Set[string] {
+ return supportedColumnDataTypes
+}
+
+func (p *DuckDBPlugin) GetAllSchemasQuery() string {
+ return ""
+}
+
+func (p *DuckDBPlugin) FormTableName(schema string, storageUnit string) string {
+ return storageUnit
+}
+
+func (p *DuckDBPlugin) GetDatabases(config *engine.PluginConfig) ([]string, error) {
+ directory := getDefaultDirectory()
+ entries, err := os.ReadDir(directory)
+ if err != nil {
+ return nil, err
+ }
+
+ databases := []string{}
+ for _, e := range entries {
+ if !e.IsDir() {
+ fileName := e.Name()
+ // Accept .duckdb, .ddb, and .db files as requested
+ if strings.HasSuffix(fileName, ".duckdb") || strings.HasSuffix(fileName, ".ddb") || strings.HasSuffix(fileName, ".db") {
+ databases = append(databases, fileName)
+ }
+ }
+ }
+
+ return databases, nil
+}
+
+func (p *DuckDBPlugin) GetAllSchemas(config *engine.PluginConfig) ([]string, error) {
+ return nil, errors.ErrUnsupported
+}
+
+func (p *DuckDBPlugin) GetTableInfoQuery() string {
+ return `
+ SELECT
+ table_name,
+ table_type
+ FROM
+ information_schema.tables
+ WHERE
+ table_schema = 'main'
+ AND table_type IN ('BASE TABLE', 'LOCAL TEMPORARY')
+ `
+}
+
+func (p *DuckDBPlugin) GetTableNameAndAttributes(rows *sql.Rows, db *gorm.DB) (string, []engine.Record) {
+ var tableName, tableType string
+ if err := rows.Scan(&tableName, &tableType); err != nil {
+ log.Printf("Error scanning table info: %v", err)
+ return "", nil
+ }
+
+ var rowCount int64
+ rowCountRow := db.Raw(fmt.Sprintf("SELECT COUNT(*) FROM %s", tableName)).Row()
+ err := rowCountRow.Scan(&rowCount)
+ if err != nil {
+ log.Printf("Error getting row count for table %s: %v", tableName, err)
+ return "", nil
+ }
+
+ attributes := []engine.Record{
+ {Key: "Type", Value: tableType},
+ {Key: "Count", Value: fmt.Sprintf("%d", rowCount)},
+ }
+
+ return tableName, attributes
+}
+
+func (p *DuckDBPlugin) GetSchemaTableQuery() string {
+ return `
+ SELECT
+ table_name AS TABLE_NAME,
+ column_name AS COLUMN_NAME,
+ data_type AS DATA_TYPE
+ FROM
+ information_schema.columns
+ WHERE
+ table_schema = 'main'
+ ORDER BY
+ table_name, ordinal_position
+ `
+}
+
+func (p *DuckDBPlugin) executeRawSQL(config *engine.PluginConfig, query string, params ...interface{}) (*engine.GetRowsResult, error) {
+ return plugins.WithConnection(config, p.DB, func(db *gorm.DB) (*engine.GetRowsResult, error) {
+ rows, err := db.Raw(query, params...).Rows()
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ return p.ConvertRawToRows(rows)
+ })
+}
+
+func (p *DuckDBPlugin) RawExecute(config *engine.PluginConfig, query string) (*engine.GetRowsResult, error) {
+ return p.executeRawSQL(config, query)
+}
+
+func NewDuckDBPlugin() *engine.Plugin {
+ plugin := &DuckDBPlugin{}
+ plugin.Type = engine.DatabaseType_DuckDB
+ plugin.PluginFunctions = plugin
+ plugin.GormPluginFunctions = plugin
+ return &plugin.Plugin
+}
\ No newline at end of file
diff --git a/core/src/plugins/duckdb/graph.go b/core/src/plugins/duckdb/graph.go
new file mode 100644
index 0000000..77d5b9b
--- /dev/null
+++ b/core/src/plugins/duckdb/graph.go
@@ -0,0 +1,190 @@
+/*
+ * Copyright 2025 Clidey, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package duckdb
+
+import (
+ "database/sql"
+ "fmt"
+ "log"
+ "strings"
+
+ "github.com/clidey/whodb/core/graph/model"
+ "github.com/clidey/whodb/core/src/engine"
+ "github.com/clidey/whodb/core/src/plugins"
+ "gorm.io/gorm"
+)
+
+func (p *DuckDBPlugin) GetGraph(config *engine.PluginConfig, schema string) (*model.Graph, error) {
+ return plugins.WithConnection(config, p.DB, func(db *gorm.DB) (*model.Graph, error) {
+ tables, err := p.getTablesWithRelationships(db, schema)
+ if err != nil {
+ return nil, err
+ }
+
+ nodes := make([]*model.Node, 0, len(tables))
+ edges := make([]*model.Edge, 0)
+
+ for _, table := range tables {
+ node := &model.Node{
+ ID: table.Name,
+ Label: table.Name,
+ Type: "table",
+ Metadata: map[string]interface{}{
+ "schema": schema,
+ "type": "table",
+ },
+ }
+ nodes = append(nodes, node)
+ }
+
+ // DuckDB doesn't have built-in foreign key constraints like traditional databases
+ // but we can analyze column names and types to suggest relationships
+ edges = p.inferRelationships(tables)
+
+ return &model.Graph{
+ Nodes: nodes,
+ Edges: edges,
+ }, nil
+ })
+}
+
+type TableInfo struct {
+ Name string
+ Columns []ColumnInfo
+}
+
+type ColumnInfo struct {
+ Name string
+ DataType string
+}
+
+func (p *DuckDBPlugin) getTablesWithRelationships(db *gorm.DB, schema string) ([]TableInfo, error) {
+ var tables []TableInfo
+
+ // Get all tables
+ tableRows, err := db.Raw(`
+ SELECT table_name
+ FROM information_schema.tables
+ WHERE table_schema = 'main'
+ AND table_type = 'BASE TABLE'
+ `).Rows()
+ if err != nil {
+ return nil, err
+ }
+ defer tableRows.Close()
+
+ var tableNames []string
+ for tableRows.Next() {
+ var tableName string
+ if err := tableRows.Scan(&tableName); err != nil {
+ log.Printf("Error scanning table name: %v", err)
+ continue
+ }
+ tableNames = append(tableNames, tableName)
+ }
+
+ // Get columns for each table
+ for _, tableName := range tableNames {
+ columnRows, err := db.Raw(`
+ SELECT column_name, data_type
+ FROM information_schema.columns
+ WHERE table_name = ? AND table_schema = 'main'
+ ORDER BY ordinal_position
+ `, tableName).Rows()
+ if err != nil {
+ log.Printf("Error getting columns for table %s: %v", tableName, err)
+ continue
+ }
+
+ var columns []ColumnInfo
+ for columnRows.Next() {
+ var column ColumnInfo
+ if err := columnRows.Scan(&column.Name, &column.DataType); err != nil {
+ log.Printf("Error scanning column info: %v", err)
+ continue
+ }
+ columns = append(columns, column)
+ }
+ columnRows.Close()
+
+ tables = append(tables, TableInfo{
+ Name: tableName,
+ Columns: columns,
+ })
+ }
+
+ return tables, nil
+}
+
+// inferRelationships analyzes column names to suggest possible relationships
+func (p *DuckDBPlugin) inferRelationships(tables []TableInfo) []*model.Edge {
+ var edges []*model.Edge
+
+ // Create a map of table names to their primary key columns (if identifiable)
+ primaryKeys := make(map[string][]string)
+ for _, table := range tables {
+ for _, column := range table.Columns {
+ // Common patterns for primary keys
+ if strings.EqualFold(column.Name, "id") ||
+ strings.EqualFold(column.Name, table.Name+"_id") ||
+ strings.HasSuffix(strings.ToLower(column.Name), "_id") {
+ primaryKeys[table.Name] = append(primaryKeys[table.Name], column.Name)
+ }
+ }
+ }
+
+ // Look for foreign key relationships based on naming conventions
+ for _, table := range tables {
+ for _, column := range table.Columns {
+ columnLower := strings.ToLower(column.Name)
+
+ // Skip if this looks like a primary key for this table
+ if strings.EqualFold(column.Name, "id") ||
+ strings.EqualFold(column.Name, table.Name+"_id") {
+ continue
+ }
+
+ // Look for foreign key patterns (ends with _id)
+ if strings.HasSuffix(columnLower, "_id") {
+ // Extract the potential referenced table name
+ referencedTable := strings.TrimSuffix(columnLower, "_id")
+
+ // Check if there's a table with this name or similar
+ for _, otherTable := range tables {
+ if strings.EqualFold(otherTable.Name, referencedTable) {
+ // Found a potential relationship
+ edge := &model.Edge{
+ ID: fmt.Sprintf("%s_%s_to_%s", table.Name, column.Name, otherTable.Name),
+ Source: table.Name,
+ Target: otherTable.Name,
+ Type: "foreign_key",
+ Label: fmt.Sprintf("%s.%s", table.Name, column.Name),
+ Metadata: map[string]interface{}{
+ "source_column": column.Name,
+ "type": "inferred_foreign_key",
+ },
+ }
+ edges = append(edges, edge)
+ break
+ }
+ }
+ }
+ }
+ }
+
+ return edges
+}
\ No newline at end of file
diff --git a/core/src/plugins/duckdb/utils.go b/core/src/plugins/duckdb/utils.go
new file mode 100644
index 0000000..f46e79e
--- /dev/null
+++ b/core/src/plugins/duckdb/utils.go
@@ -0,0 +1,404 @@
+/*
+ * Copyright 2025 Clidey, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package duckdb
+
+import (
+ "fmt"
+ "regexp"
+ "strings"
+ "unicode"
+
+ "github.com/clidey/whodb/core/src/engine"
+ "gorm.io/gorm"
+)
+
+func (p *DuckDBPlugin) ConvertStringValueDuringMap(value, columnType string) (interface{}, error) {
+ return value, nil
+}
+
+func (p *DuckDBPlugin) GetPrimaryKeyColQuery() string {
+ return `
+ SELECT
+ column_name as pk_column
+ FROM
+ information_schema.key_column_usage
+ WHERE
+ table_name = ?
+ AND table_schema = 'main'
+ AND constraint_name LIKE '%_pkey'
+ ORDER BY
+ ordinal_position;`
+}
+
+func (p *DuckDBPlugin) GetColTypeQuery() string {
+ return `
+ SELECT
+ column_name AS column_name,
+ data_type AS data_type
+ FROM
+ information_schema.columns
+ WHERE
+ table_name = ?
+ AND table_schema = 'main'
+ ORDER BY
+ ordinal_position;`
+}
+
+// DuckDB reserved keywords that require quoting or are forbidden
+var duckdbReservedKeywords = map[string]bool{
+ "SELECT": true, "FROM": true, "WHERE": true, "INSERT": true, "UPDATE": true, "DELETE": true,
+ "CREATE": true, "DROP": true, "ALTER": true, "TABLE": true, "INDEX": true, "VIEW": true,
+ "DATABASE": true, "SCHEMA": true, "COLUMN": true, "CONSTRAINT": true, "PRIMARY": true,
+ "FOREIGN": true, "KEY": true, "REFERENCES": true, "UNIQUE": true, "NOT": true, "NULL": true,
+ "DEFAULT": true, "CHECK": true, "UNION": true, "JOIN": true, "INNER": true, "LEFT": true,
+ "RIGHT": true, "FULL": true, "OUTER": true, "ON": true, "USING": true, "GROUP": true,
+ "ORDER": true, "BY": true, "HAVING": true, "LIMIT": true, "OFFSET": true, "AS": true,
+ "DISTINCT": true, "ALL": true, "EXISTS": true, "IN": true, "BETWEEN": true, "LIKE": true,
+ "ILIKE": true, "IS": true, "AND": true, "OR": true, "CASE": true, "WHEN": true, "THEN": true,
+ "ELSE": true, "END": true, "CAST": true, "EXTRACT": true, "SUBSTRING": true, "TRIM": true,
+ "COALESCE": true, "NULLIF": true, "GREATEST": true, "LEAST": true, "ARRAY": true, "STRUCT": true,
+ "MAP": true, "UNION": true, "EXCEPT": true, "INTERSECT": true, "WITH": true, "RECURSIVE": true,
+ "RETURNING": true, "CONFLICT": true, "DO": true, "NOTHING": true, "UPSERT": true,
+}
+
+// validateIdentifier validates that an identifier is safe for use in SQL
+func (p *DuckDBPlugin) validateIdentifier(identifier string) error {
+ // Check length limits (DuckDB supports up to 64 characters for identifiers)
+ if len(identifier) == 0 {
+ return fmt.Errorf("identifier cannot be empty")
+ }
+ if len(identifier) > 64 {
+ return fmt.Errorf("identifier exceeds maximum length of 64 characters")
+ }
+
+ // Check for null bytes and other dangerous control characters
+ if strings.Contains(identifier, "\x00") {
+ return fmt.Errorf("identifier contains null byte")
+ }
+
+ // Check for dangerous characters that could enable injection
+ for _, char := range identifier {
+ if char < 32 && char != 9 && char != 10 && char != 13 { // Allow tab, newline, carriage return
+ return fmt.Errorf("identifier contains invalid control character: %U", char)
+ }
+ }
+
+ // Check for SQL injection patterns
+ suspiciousPatterns := []string{
+ "--", "/*", "*/", ";", "xp_", "sp_", "@@", "EXEC", "EXECUTE",
+ "SCRIPT", "JAVASCRIPT", "VBSCRIPT", "ONLOAD", "ONERROR",
+ }
+ upperIdentifier := strings.ToUpper(identifier)
+ for _, pattern := range suspiciousPatterns {
+ if strings.Contains(upperIdentifier, pattern) {
+ return fmt.Errorf("identifier contains suspicious pattern: %s", pattern)
+ }
+ }
+
+ return nil
+}
+
+// EscapeSpecificIdentifier properly escapes and validates identifiers for DuckDB
+func (p *DuckDBPlugin) EscapeSpecificIdentifier(identifier string) string {
+ // First validate the identifier for security
+ if err := p.validateIdentifier(identifier); err != nil {
+ // If validation fails, create a safe fallback identifier
+ // This prevents injection while maintaining functionality
+ safeIdentifier := p.createSafeIdentifier(identifier)
+ return safeIdentifier
+ }
+
+ // Check if identifier needs quoting (contains special chars or is reserved)
+ needsQuoting := p.identifierNeedsQuoting(identifier)
+
+ if needsQuoting {
+ // Escape double quotes by doubling them, then wrap in quotes
+ escaped := strings.Replace(identifier, "\"", "\"\"", -1)
+ return "\"" + escaped + "\""
+ }
+
+ // Return identifier as-is if it doesn't need quoting
+ return identifier
+}
+
+// identifierNeedsQuoting determines if an identifier needs to be quoted
+func (p *DuckDBPlugin) identifierNeedsQuoting(identifier string) bool {
+ // Check if it's a reserved keyword
+ if duckdbReservedKeywords[strings.ToUpper(identifier)] {
+ return true
+ }
+
+ // Check if it starts with a number
+ if len(identifier) > 0 && unicode.IsDigit(rune(identifier[0])) {
+ return true
+ }
+
+ // Check if it contains special characters that require quoting
+ for _, char := range identifier {
+ if !unicode.IsLetter(char) && !unicode.IsDigit(char) && char != '_' {
+ return true
+ }
+ }
+
+ return false
+}
+
+// createSafeIdentifier creates a safe fallback identifier when validation fails
+func (p *DuckDBPlugin) createSafeIdentifier(original string) string {
+ // Create a safe identifier by removing dangerous characters
+ reg := regexp.MustCompile(`[^a-zA-Z0-9_]`)
+ safe := reg.ReplaceAllString(original, "_")
+
+ // Ensure it doesn't start with a number
+ if len(safe) > 0 && unicode.IsDigit(rune(safe[0])) {
+ safe = "col_" + safe
+ }
+
+ // Ensure it's not empty
+ if safe == "" {
+ safe = "safe_identifier"
+ }
+
+ // Ensure it's not too long
+ if len(safe) > 64 {
+ safe = safe[:64]
+ }
+
+ // Ensure it's not a reserved keyword
+ if duckdbReservedKeywords[strings.ToUpper(safe)] {
+ safe = safe + "_col"
+ }
+
+ // Always quote safe identifiers since they may have been modified
+ return "\"" + safe + "\""
+}
+
+// GetGraphQueryDB returns the database connection for graph queries
+func (p *DuckDBPlugin) GetGraphQueryDB(db *gorm.DB, schema string) *gorm.DB {
+ // For DuckDB, we don't need schema-specific handling since it uses 'main' schema
+ return db
+}
+
+// GetCreateTableQuery generates a CREATE TABLE statement for DuckDB
+func (p *DuckDBPlugin) GetCreateTableQuery(schema string, storageUnit string, columns []engine.Record) string {
+ var columnDefs []string
+
+ for _, column := range columns {
+ // Use secure identifier escaping (handles quoting automatically)
+ columnName := p.EscapeSpecificIdentifier(column.Key)
+ columnType := column.Value
+
+ // Validate and normalize column type for DuckDB
+ normalizedType := p.normalizeColumnType(columnType)
+
+ // EscapeSpecificIdentifier now handles quoting, so don't add extra quotes
+ columnDefs = append(columnDefs, fmt.Sprintf("%s %s", columnName, normalizedType))
+ }
+
+ // Use secure identifier escaping for table name (handles quoting automatically)
+ tableName := p.EscapeSpecificIdentifier(storageUnit)
+ return fmt.Sprintf("CREATE TABLE %s (%s)", tableName, strings.Join(columnDefs, ", "))
+}
+
+// normalizeColumnType ensures the column type is valid for DuckDB and prevents injection
+func (p *DuckDBPlugin) normalizeColumnType(columnType string) string {
+ // First sanitize the input to prevent SQL injection through column types
+ columnType = strings.TrimSpace(columnType)
+
+ // Enhanced security validation with comprehensive pattern detection
+ if err := p.validateColumnType(columnType); err != nil {
+ // If validation fails, return safe default and log the attempt
+ return "VARCHAR"
+ }
+
+ upperType := strings.ToUpper(columnType)
+
+ // Map common SQL types to DuckDB equivalents (simple types first)
+ switch upperType {
+ case "INT", "INT4":
+ return "INTEGER"
+ case "INT8":
+ return "BIGINT"
+ case "INT2":
+ return "SMALLINT"
+ case "INT1":
+ return "TINYINT"
+ case "FLOAT4":
+ return "REAL"
+ case "FLOAT8":
+ return "DOUBLE"
+ case "BOOL":
+ return "BOOLEAN"
+ case "STRING":
+ return "VARCHAR"
+ default:
+ // Check if it's a valid DuckDB type with parameters (e.g., VARCHAR(255))
+ validatedType := p.parseAndValidateParameterizedType(upperType)
+ if validatedType != "" {
+ return validatedType
+ }
+
+ // Check simple types without parameters
+ if p.isValidSimpleDuckDBType(upperType) {
+ return upperType
+ }
+
+ // Default to VARCHAR for unknown types
+ return "VARCHAR"
+ }
+}
+
+// validateColumnType performs comprehensive security validation on column types
+func (p *DuckDBPlugin) validateColumnType(columnType string) error {
+ // Check length limits
+ if len(columnType) == 0 {
+ return fmt.Errorf("column type cannot be empty")
+ }
+ if len(columnType) > 100 {
+ return fmt.Errorf("column type exceeds maximum length")
+ }
+
+ // Check for null bytes and dangerous control characters
+ if strings.Contains(columnType, "\x00") {
+ return fmt.Errorf("column type contains null byte")
+ }
+
+ // Check for dangerous characters that could enable injection
+ for _, char := range columnType {
+ if char < 32 && char != 9 && char != 10 && char != 13 {
+ return fmt.Errorf("column type contains invalid control character")
+ }
+ }
+
+ // Enhanced suspicious pattern detection
+ suspiciousPatterns := []string{
+ "--", "/*", "*/", ";", "DROP", "DELETE", "INSERT", "UPDATE", "CREATE", "ALTER",
+ "EXEC", "EXECUTE", "SCRIPT", "XP_", "SP_", "@@", "UNION", "SELECT", "FROM",
+ "WHERE", "ORDER", "GROUP", "HAVING", "DECLARE", "SET", "GRANT", "REVOKE",
+ "TRUNCATE", "MERGE", "CALL", "RETURN", "THROW", "TRY", "CATCH", "BEGIN",
+ "END", "IF", "ELSE", "WHILE", "FOR", "CURSOR", "PROCEDURE", "FUNCTION",
+ "TRIGGER", "VIEW", "SCHEMA", "DATABASE", "BACKUP", "RESTORE", "SHUTDOWN",
+ }
+
+ upperType := strings.ToUpper(columnType)
+ for _, pattern := range suspiciousPatterns {
+ if strings.Contains(upperType, pattern) {
+ return fmt.Errorf("column type contains suspicious pattern: %s", pattern)
+ }
+ }
+
+ // Validate that column type only contains strictly allowed characters
+ // More restrictive: only letters, numbers, parentheses, spaces, comma, underscore
+ allowedChars := regexp.MustCompile(`^[a-zA-Z0-9()\s,_]+$`)
+ if !allowedChars.MatchString(columnType) {
+ return fmt.Errorf("column type contains invalid characters")
+ }
+
+ return nil
+}
+
+// parseAndValidateParameterizedType handles types with parameters like VARCHAR(255) or DECIMAL(10,2)
+func (p *DuckDBPlugin) parseAndValidateParameterizedType(typeStr string) string {
+ // Match pattern: TYPE(param1) or TYPE(param1,param2)
+ paramPattern := regexp.MustCompile(`^([A-Z]+)\s*\(\s*(\d+)(?:\s*,\s*(\d+))?\s*\)$`)
+ matches := paramPattern.FindStringSubmatch(typeStr)
+
+ if len(matches) == 0 {
+ return "" // Not a parameterized type
+ }
+
+ baseType := matches[1]
+ param1 := matches[2]
+ param2 := ""
+ if len(matches) > 3 && matches[3] != "" {
+ param2 = matches[3]
+ }
+
+ // Validate base type is allowed
+ if !p.isValidSimpleDuckDBType(baseType) {
+ return ""
+ }
+
+ // Validate parameters are within reasonable bounds
+ if !p.validateTypeParameters(baseType, param1, param2) {
+ return ""
+ }
+
+ // Reconstruct the validated type
+ if param2 != "" {
+ return fmt.Sprintf("%s(%s,%s)", baseType, param1, param2)
+ } else {
+ return fmt.Sprintf("%s(%s)", baseType, param1)
+ }
+}
+
+// validateTypeParameters ensures type parameters are within valid ranges
+func (p *DuckDBPlugin) validateTypeParameters(baseType, param1, param2 string) bool {
+ // Convert parameters to integers for validation
+ p1, err1 := fmt.Atoi(param1)
+ if err1 != nil {
+ return false
+ }
+
+ var p2 int
+ var err2 error
+ if param2 != "" {
+ p2, err2 = fmt.Atoi(param2)
+ if err2 != nil {
+ return false
+ }
+ }
+
+ // Validate parameter ranges based on type
+ switch baseType {
+ case "VARCHAR", "CHAR", "TEXT":
+ // Length parameter should be reasonable (1 to 65535)
+ return p1 > 0 && p1 <= 65535 && param2 == ""
+ case "DECIMAL", "NUMERIC":
+ // Precision and scale parameters
+ if param2 == "" {
+ // Only precision specified
+ return p1 > 0 && p1 <= 38
+ } else {
+ // Both precision and scale
+ return p1 > 0 && p1 <= 38 && p2 >= 0 && p2 <= p1
+ }
+ case "FLOAT", "DOUBLE", "REAL":
+ // Precision parameter for float types
+ return p1 > 0 && p1 <= 53 && param2 == ""
+ default:
+ // For other types, be conservative and don't allow parameters
+ return false
+ }
+}
+
+// isValidSimpleDuckDBType checks if a type string is a valid simple DuckDB type (no parameters)
+func (p *DuckDBPlugin) isValidSimpleDuckDBType(typeStr string) bool {
+ // Comprehensive list of valid DuckDB base types
+ validTypes := map[string]bool{
+ "BOOLEAN": true, "TINYINT": true, "SMALLINT": true, "INTEGER": true, "BIGINT": true,
+ "HUGEINT": true, "UTINYINT": true, "USMALLINT": true, "UINTEGER": true, "UBIGINT": true,
+ "REAL": true, "DOUBLE": true, "DECIMAL": true, "NUMERIC": true, "VARCHAR": true,
+ "CHAR": true, "TEXT": true, "STRING": true, "BLOB": true, "BYTEA": true,
+ "DATE": true, "TIME": true, "TIMESTAMP": true, "TIMESTAMPTZ": true, "INTERVAL": true,
+ "UUID": true, "JSON": true, "ARRAY": true, "LIST": true, "STRUCT": true, "MAP": true,
+ "UNION": true, "ENUM": true,
+ }
+
+ return validTypes[typeStr]
+}
+
diff --git a/core/src/plugins/gorm/db.go b/core/src/plugins/gorm/db.go
index 1c5431d..4ae55de 100644
--- a/core/src/plugins/gorm/db.go
+++ b/core/src/plugins/gorm/db.go
@@ -37,6 +37,12 @@ const (
readOnlyKey = "Readonly"
debugKey = "Debug"
connectionTimeoutKey = "Connection Timeout"
+
+ // DuckDB specific connection options
+ duckdbAccessModeKey = "Access Mode" // read_only, read_write
+ duckdbThreadsKey = "Threads" // number of threads for parallel execution
+ duckdbMaxMemoryKey = "Max Memory" // maximum memory usage (e.g., "1GB")
+ duckdbTempDirectoryKey = "Temp Directory" // temporary directory for intermediate results
)
// DefaultDatabasePorts maps database systems to their standard default ports
@@ -45,6 +51,7 @@ var defaultDatabasePorts = map[engine.DatabaseType]string{
engine.DatabaseType_MariaDB: "3306",
engine.DatabaseType_Postgres: "5432",
engine.DatabaseType_Sqlite3: "0", // SQLite is file-based, no port
+ engine.DatabaseType_DuckDB: "0", // DuckDB is file-based, no port
engine.DatabaseType_ClickHouse: "9000", // TCP port (HTTP port is 8123)
engine.DatabaseType_MongoDB: "27017",
engine.DatabaseType_ElasticSearch: "9200", // HTTP port (Transport port is 9300)
@@ -72,6 +79,12 @@ type ConnectionInput struct {
ConnectionTimeout int
+ //duckdb
+ DuckDBAccessMode string
+ DuckDBThreads string
+ DuckDBMaxMemory string
+ DuckDBTempDirectory string
+
ExtraOptions map[string]string `validate:"omitnil"`
}
@@ -111,6 +124,12 @@ func (p *GormPlugin) ParseConnectionConfig(config *engine.PluginConfig) (*Connec
return nil, err
}
+ //duckdb specific
+ duckdbAccessMode := common.GetRecordValueOrDefault(config.Credentials.Advanced, duckdbAccessModeKey, "read_write")
+ duckdbThreads := common.GetRecordValueOrDefault(config.Credentials.Advanced, duckdbThreadsKey, "")
+ duckdbMaxMemory := common.GetRecordValueOrDefault(config.Credentials.Advanced, duckdbMaxMemoryKey, "")
+ duckdbTempDirectory := common.GetRecordValueOrDefault(config.Credentials.Advanced, duckdbTempDirectoryKey, "")
+
input := &ConnectionInput{
Username: url.PathEscape(config.Credentials.Username),
Password: url.PathEscape(config.Credentials.Password),
@@ -125,6 +144,10 @@ func (p *GormPlugin) ParseConnectionConfig(config *engine.PluginConfig) (*Connec
ReadOnly: readOnly,
Debug: debug,
ConnectionTimeout: connectionTimeout,
+ DuckDBAccessMode: duckdbAccessMode,
+ DuckDBThreads: duckdbThreads,
+ DuckDBMaxMemory: duckdbMaxMemory,
+ DuckDBTempDirectory: duckdbTempDirectory,
}
// if this config is a pre-configured profile, then allow reading of additional params
@@ -132,7 +155,7 @@ func (p *GormPlugin) ParseConnectionConfig(config *engine.PluginConfig) (*Connec
params := make(map[string]string)
for _, record := range config.Credentials.Advanced {
switch record.Key {
- case portKey, parseTimeKey, locKey, allowClearTextPasswordsKey, sslModeKey, httpProtocolKey, readOnlyKey, debugKey, connectionTimeoutKey:
+ case portKey, parseTimeKey, locKey, allowClearTextPasswordsKey, sslModeKey, httpProtocolKey, readOnlyKey, debugKey, connectionTimeoutKey, duckdbAccessModeKey, duckdbThreadsKey, duckdbMaxMemoryKey, duckdbTempDirectoryKey:
continue
default:
params[record.Key] = url.QueryEscape(record.Value) // todo: this may break for postgres
diff --git a/core/src/src.go b/core/src/src.go
index 713540f..c6114c2 100644
--- a/core/src/src.go
+++ b/core/src/src.go
@@ -19,6 +19,7 @@ package src
import (
"fmt"
"github.com/clidey/whodb/core/src/plugins/clickhouse"
+ "github.com/clidey/whodb/core/src/plugins/duckdb"
"github.com/clidey/whodb/core/src/plugins/elasticsearch"
"github.com/clidey/whodb/core/src/plugins/mongodb"
"github.com/clidey/whodb/core/src/plugins/mysql"
@@ -38,6 +39,7 @@ func InitializeEngine() *engine.Engine {
MainEngine.RegistryPlugin(mysql.NewMySQLPlugin())
MainEngine.RegistryPlugin(mysql.NewMyMariaDBPlugin())
MainEngine.RegistryPlugin(sqlite3.NewSqlite3Plugin())
+ MainEngine.RegistryPlugin(duckdb.NewDuckDBPlugin())
MainEngine.RegistryPlugin(mongodb.NewMongoDBPlugin())
MainEngine.RegistryPlugin(redis.NewRedisPlugin())
MainEngine.RegistryPlugin(elasticsearch.NewElasticSearchPlugin())
diff --git a/frontend/src/components/icons.tsx b/frontend/src/components/icons.tsx
index d8c3fe7..5729248 100644
--- a/frontend/src/components/icons.tsx
+++ b/frontend/src/components/icons.tsx
@@ -193,6 +193,11 @@ export const Icons = {
,
+ DuckDB: ,
Anthropic: ,
OpenAICompatible: