diff --git a/core/go.mod b/core/go.mod index 7207ace..7b6fa86 100644 --- a/core/go.mod +++ b/core/go.mod @@ -21,6 +21,7 @@ require ( gorm.io/driver/postgres v1.5.11 gorm.io/driver/sqlite v1.5.7 gorm.io/gorm v1.25.12 + github.com/alifiroozi80/duckdb v1.1.1 ) require ( diff --git a/core/graph/schema.graphqls b/core/graph/schema.graphqls index 6e8b966..cb609af 100644 --- a/core/graph/schema.graphqls +++ b/core/graph/schema.graphqls @@ -7,6 +7,7 @@ enum DatabaseType { ElasticSearch, MariaDB, ClickHouse, + DuckDB, } type Column { diff --git a/core/src/engine/engine.go b/core/src/engine/engine.go index af1dcd2..10133c2 100644 --- a/core/src/engine/engine.go +++ b/core/src/engine/engine.go @@ -23,6 +23,7 @@ const ( DatabaseType_MySQL = "MySQL" DatabaseType_MariaDB = "MariaDB" DatabaseType_Sqlite3 = "Sqlite3" + DatabaseType_DuckDB = "DuckDB" DatabaseType_MongoDB = "MongoDB" DatabaseType_Redis = "Redis" DatabaseType_ElasticSearch = "ElasticSearch" diff --git a/core/src/plugins/duckdb/add.go b/core/src/plugins/duckdb/add.go new file mode 100644 index 0000000..56ef28e --- /dev/null +++ b/core/src/plugins/duckdb/add.go @@ -0,0 +1,45 @@ +/* + * Copyright 2025 Clidey, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package duckdb + +import ( + "fmt" + + "github.com/clidey/whodb/core/src/engine" + "github.com/clidey/whodb/core/src/plugins" + "gorm.io/gorm" +) + +func (p *DuckDBPlugin) AddStorageUnitRow(config *engine.PluginConfig, schema string, storageUnit string, values []engine.Record) error { + return plugins.WithConnection(config, p.DB, func(db *gorm.DB) error { + tableName := p.FormTableName(schema, storageUnit) + + data, err := p.ConvertRecordValuesToMap(values) + if err != nil { + return err + } + + return db.Table(tableName).Create(data).Error + }) +} + +func (p *DuckDBPlugin) AddStorageUnit(config *engine.PluginConfig, schema string, storageUnit string, columns []engine.Record) error { + return plugins.WithConnection(config, p.DB, func(db *gorm.DB) error { + query := p.GetCreateTableQuery(schema, storageUnit, columns) + return db.Exec(query).Error + }) +} \ No newline at end of file diff --git a/core/src/plugins/duckdb/db.go b/core/src/plugins/duckdb/db.go new file mode 100644 index 0000000..05a61e5 --- /dev/null +++ b/core/src/plugins/duckdb/db.go @@ -0,0 +1,185 @@ +/* + * Copyright 2025 Clidey, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package duckdb + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/alifiroozi80/duckdb" + "github.com/clidey/whodb/core/src/engine" + "github.com/clidey/whodb/core/src/env" + "gorm.io/gorm" +) + +func getDefaultDirectory() string { + directory := "/db/" + if env.IsDevelopment { + directory = "tmp/" + } + return directory +} + +var errDoesNotExist = errors.New("unauthorized or the database doesn't exist") + +func (p *DuckDBPlugin) DB(config *engine.PluginConfig) (*gorm.DB, error) { + connectionInput, err := p.ParseConnectionConfig(config) + if err != nil { + return nil, err + } + + database := connectionInput.Database + fileNameDatabase := filepath.Join(getDefaultDirectory(), database) + + // Security check: ensure the file path is within the allowed directory + if !strings.HasPrefix(fileNameDatabase, getDefaultDirectory()) { + return nil, errDoesNotExist + } + + // Check if file exists + if _, err := os.Stat(fileNameDatabase); errors.Is(err, os.ErrNotExist) { + return nil, errDoesNotExist + } + + // Validate file extension (accept .duckdb, .ddb, .db as requested) + ext := strings.ToLower(filepath.Ext(fileNameDatabase)) + if ext != ".duckdb" && ext != ".ddb" && ext != ".db" { + return nil, fmt.Errorf("unsupported file extension: %s. Only .duckdb, .ddb, and .db files are supported", ext) + } + + // Create connection string for DuckDB + // DuckDB supports various connection options for performance and behavior tuning + dsn := fileNameDatabase + + // Add DuckDB-specific connection options + params := make([]string, 0) + + // Access mode: read_only or read_write + if connectionInput.DuckDBAccessMode != "" && connectionInput.DuckDBAccessMode != "read_write" { + params = append(params, fmt.Sprintf("access_mode=%s", connectionInput.DuckDBAccessMode)) + } + + // Thread configuration for parallel execution + if connectionInput.DuckDBThreads != "" { + params = append(params, fmt.Sprintf("threads=%s", connectionInput.DuckDBThreads)) + } + + // Memory limit configuration (e.g., "1GB", "512MB") + if connectionInput.DuckDBMaxMemory != "" { + params = append(params, fmt.Sprintf("max_memory=%s", connectionInput.DuckDBMaxMemory)) + } + + // Temporary directory for intermediate results + if connectionInput.DuckDBTempDirectory != "" { + params = append(params, fmt.Sprintf("temp_directory=%s", connectionInput.DuckDBTempDirectory)) + } + + // Add any extra connection options if needed + if connectionInput.ExtraOptions != nil && len(connectionInput.ExtraOptions) > 0 { + for key, value := range connectionInput.ExtraOptions { + params = append(params, fmt.Sprintf("%s=%s", key, value)) + } + } + + // Build final DSN + if len(params) > 0 { + dsn += "?" + strings.Join(params, "&") + } + + db, err := gorm.Open(duckdb.Open(dsn), &gorm.Config{}) + if err != nil { + return nil, fmt.Errorf("failed to connect to DuckDB: %w", err) + } + + // After successful connection, enable CSV/Parquet reading from the same directory + if err := p.setupFileAccess(db, fileNameDatabase); err != nil { + return nil, fmt.Errorf("failed to setup file access: %w", err) + } + + return db, nil +} + +// setupFileAccess configures DuckDB to allow reading CSV and Parquet files from the same directory as the database +func (p *DuckDBPlugin) setupFileAccess(db *gorm.DB, dbFilePath string) error { + // Get the directory containing the database file + dbDir := filepath.Dir(dbFilePath) + + // Enable the httpfs extension for reading files (optional, for http/https URLs) + // This is disabled by default for security + + // Create views or helper functions for reading CSV/Parquet files from the same directory + // This is done by creating a function that validates file paths + + // For now, we'll just validate that the directory is accessible + // The actual CSV/Parquet reading will be done through raw SQL queries + // that we'll validate to ensure they only access files in the same directory + + _, err := os.Stat(dbDir) + if err != nil { + return fmt.Errorf("database directory not accessible: %w", err) + } + + return nil +} + +// ValidateFileAccess ensures that a file path is within the allowed directory (same as database) +func (p *DuckDBPlugin) ValidateFileAccess(dbFilePath, requestedFilePath string) error { + dbDir := filepath.Dir(dbFilePath) + + // Clean the requested file path + cleanPath := filepath.Clean(requestedFilePath) + + // If it's not an absolute path, make it relative to the database directory + if !filepath.IsAbs(cleanPath) { + cleanPath = filepath.Join(dbDir, cleanPath) + } + + // Get absolute path of the database directory for comparison + dbDirAbs, err := filepath.Abs(dbDir) + if err != nil { + return fmt.Errorf("cannot resolve database directory: %w", err) + } + + // SECURITY FIX: Resolve symlinks BEFORE checking directory containment + // This prevents symlink-based directory traversal attacks + resolvedPath, err := filepath.EvalSymlinks(cleanPath) + if err != nil { + // If symlink resolution fails, the file might not exist or be inaccessible + return fmt.Errorf("cannot resolve file path (file may not exist or be inaccessible): %w", err) + } + + // Ensure the resolved path is within the database directory + if !strings.HasPrefix(resolvedPath, dbDirAbs+string(filepath.Separator)) && resolvedPath != dbDirAbs { + return fmt.Errorf("file access denied: file must be in the same directory as the database") + } + + // Check if the file exists (should exist since EvalSymlinks succeeded, but double-check) + if _, err := os.Stat(resolvedPath); os.IsNotExist(err) { + return fmt.Errorf("file does not exist: %s", resolvedPath) + } + + // Check if it's a CSV or Parquet file + ext := strings.ToLower(filepath.Ext(resolvedPath)) + if ext != ".csv" && ext != ".parquet" { + return fmt.Errorf("unsupported file type: %s. Only .csv and .parquet files are supported", ext) + } + + return nil +} \ No newline at end of file diff --git a/core/src/plugins/duckdb/duckdb.go b/core/src/plugins/duckdb/duckdb.go new file mode 100644 index 0000000..6d81bdb --- /dev/null +++ b/core/src/plugins/duckdb/duckdb.go @@ -0,0 +1,162 @@ +/* + * Copyright 2025 Clidey, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package duckdb + +import ( + "database/sql" + "errors" + "fmt" + "log" + "os" + "path/filepath" + "strings" + + "github.com/clidey/whodb/core/src/engine" + "github.com/clidey/whodb/core/src/plugins" + gorm_plugin "github.com/clidey/whodb/core/src/plugins/gorm" + mapset "github.com/deckarep/golang-set/v2" + "gorm.io/gorm" +) + +var ( + supportedColumnDataTypes = mapset.NewSet( + "BOOLEAN", "TINYINT", "SMALLINT", "INTEGER", "BIGINT", "UTINYINT", "USMALLINT", "UINTEGER", "UBIGINT", "HUGEINT", + "REAL", "DOUBLE", "DECIMAL", "NUMERIC", + "VARCHAR", "CHAR", "TEXT", "STRING", "BPCHAR", + "BYTEA", "BLOB", "VARBINARY", "BINARY", + "DATE", "TIME", "TIMESTAMP", "TIMESTAMPTZ", "INTERVAL", + "UUID", "JSON", + "ARRAY", "LIST", "STRUCT", "MAP", "UNION", + "BIT", "BITSTRING", + ) +) + +type DuckDBPlugin struct { + gorm_plugin.GormPlugin +} + +func (p *DuckDBPlugin) GetSupportedColumnDataTypes() mapset.Set[string] { + return supportedColumnDataTypes +} + +func (p *DuckDBPlugin) GetAllSchemasQuery() string { + return "" +} + +func (p *DuckDBPlugin) FormTableName(schema string, storageUnit string) string { + return storageUnit +} + +func (p *DuckDBPlugin) GetDatabases(config *engine.PluginConfig) ([]string, error) { + directory := getDefaultDirectory() + entries, err := os.ReadDir(directory) + if err != nil { + return nil, err + } + + databases := []string{} + for _, e := range entries { + if !e.IsDir() { + fileName := e.Name() + // Accept .duckdb, .ddb, and .db files as requested + if strings.HasSuffix(fileName, ".duckdb") || strings.HasSuffix(fileName, ".ddb") || strings.HasSuffix(fileName, ".db") { + databases = append(databases, fileName) + } + } + } + + return databases, nil +} + +func (p *DuckDBPlugin) GetAllSchemas(config *engine.PluginConfig) ([]string, error) { + return nil, errors.ErrUnsupported +} + +func (p *DuckDBPlugin) GetTableInfoQuery() string { + return ` + SELECT + table_name, + table_type + FROM + information_schema.tables + WHERE + table_schema = 'main' + AND table_type IN ('BASE TABLE', 'LOCAL TEMPORARY') + ` +} + +func (p *DuckDBPlugin) GetTableNameAndAttributes(rows *sql.Rows, db *gorm.DB) (string, []engine.Record) { + var tableName, tableType string + if err := rows.Scan(&tableName, &tableType); err != nil { + log.Printf("Error scanning table info: %v", err) + return "", nil + } + + var rowCount int64 + rowCountRow := db.Raw(fmt.Sprintf("SELECT COUNT(*) FROM %s", tableName)).Row() + err := rowCountRow.Scan(&rowCount) + if err != nil { + log.Printf("Error getting row count for table %s: %v", tableName, err) + return "", nil + } + + attributes := []engine.Record{ + {Key: "Type", Value: tableType}, + {Key: "Count", Value: fmt.Sprintf("%d", rowCount)}, + } + + return tableName, attributes +} + +func (p *DuckDBPlugin) GetSchemaTableQuery() string { + return ` + SELECT + table_name AS TABLE_NAME, + column_name AS COLUMN_NAME, + data_type AS DATA_TYPE + FROM + information_schema.columns + WHERE + table_schema = 'main' + ORDER BY + table_name, ordinal_position + ` +} + +func (p *DuckDBPlugin) executeRawSQL(config *engine.PluginConfig, query string, params ...interface{}) (*engine.GetRowsResult, error) { + return plugins.WithConnection(config, p.DB, func(db *gorm.DB) (*engine.GetRowsResult, error) { + rows, err := db.Raw(query, params...).Rows() + if err != nil { + return nil, err + } + defer rows.Close() + + return p.ConvertRawToRows(rows) + }) +} + +func (p *DuckDBPlugin) RawExecute(config *engine.PluginConfig, query string) (*engine.GetRowsResult, error) { + return p.executeRawSQL(config, query) +} + +func NewDuckDBPlugin() *engine.Plugin { + plugin := &DuckDBPlugin{} + plugin.Type = engine.DatabaseType_DuckDB + plugin.PluginFunctions = plugin + plugin.GormPluginFunctions = plugin + return &plugin.Plugin +} \ No newline at end of file diff --git a/core/src/plugins/duckdb/graph.go b/core/src/plugins/duckdb/graph.go new file mode 100644 index 0000000..77d5b9b --- /dev/null +++ b/core/src/plugins/duckdb/graph.go @@ -0,0 +1,190 @@ +/* + * Copyright 2025 Clidey, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package duckdb + +import ( + "database/sql" + "fmt" + "log" + "strings" + + "github.com/clidey/whodb/core/graph/model" + "github.com/clidey/whodb/core/src/engine" + "github.com/clidey/whodb/core/src/plugins" + "gorm.io/gorm" +) + +func (p *DuckDBPlugin) GetGraph(config *engine.PluginConfig, schema string) (*model.Graph, error) { + return plugins.WithConnection(config, p.DB, func(db *gorm.DB) (*model.Graph, error) { + tables, err := p.getTablesWithRelationships(db, schema) + if err != nil { + return nil, err + } + + nodes := make([]*model.Node, 0, len(tables)) + edges := make([]*model.Edge, 0) + + for _, table := range tables { + node := &model.Node{ + ID: table.Name, + Label: table.Name, + Type: "table", + Metadata: map[string]interface{}{ + "schema": schema, + "type": "table", + }, + } + nodes = append(nodes, node) + } + + // DuckDB doesn't have built-in foreign key constraints like traditional databases + // but we can analyze column names and types to suggest relationships + edges = p.inferRelationships(tables) + + return &model.Graph{ + Nodes: nodes, + Edges: edges, + }, nil + }) +} + +type TableInfo struct { + Name string + Columns []ColumnInfo +} + +type ColumnInfo struct { + Name string + DataType string +} + +func (p *DuckDBPlugin) getTablesWithRelationships(db *gorm.DB, schema string) ([]TableInfo, error) { + var tables []TableInfo + + // Get all tables + tableRows, err := db.Raw(` + SELECT table_name + FROM information_schema.tables + WHERE table_schema = 'main' + AND table_type = 'BASE TABLE' + `).Rows() + if err != nil { + return nil, err + } + defer tableRows.Close() + + var tableNames []string + for tableRows.Next() { + var tableName string + if err := tableRows.Scan(&tableName); err != nil { + log.Printf("Error scanning table name: %v", err) + continue + } + tableNames = append(tableNames, tableName) + } + + // Get columns for each table + for _, tableName := range tableNames { + columnRows, err := db.Raw(` + SELECT column_name, data_type + FROM information_schema.columns + WHERE table_name = ? AND table_schema = 'main' + ORDER BY ordinal_position + `, tableName).Rows() + if err != nil { + log.Printf("Error getting columns for table %s: %v", tableName, err) + continue + } + + var columns []ColumnInfo + for columnRows.Next() { + var column ColumnInfo + if err := columnRows.Scan(&column.Name, &column.DataType); err != nil { + log.Printf("Error scanning column info: %v", err) + continue + } + columns = append(columns, column) + } + columnRows.Close() + + tables = append(tables, TableInfo{ + Name: tableName, + Columns: columns, + }) + } + + return tables, nil +} + +// inferRelationships analyzes column names to suggest possible relationships +func (p *DuckDBPlugin) inferRelationships(tables []TableInfo) []*model.Edge { + var edges []*model.Edge + + // Create a map of table names to their primary key columns (if identifiable) + primaryKeys := make(map[string][]string) + for _, table := range tables { + for _, column := range table.Columns { + // Common patterns for primary keys + if strings.EqualFold(column.Name, "id") || + strings.EqualFold(column.Name, table.Name+"_id") || + strings.HasSuffix(strings.ToLower(column.Name), "_id") { + primaryKeys[table.Name] = append(primaryKeys[table.Name], column.Name) + } + } + } + + // Look for foreign key relationships based on naming conventions + for _, table := range tables { + for _, column := range table.Columns { + columnLower := strings.ToLower(column.Name) + + // Skip if this looks like a primary key for this table + if strings.EqualFold(column.Name, "id") || + strings.EqualFold(column.Name, table.Name+"_id") { + continue + } + + // Look for foreign key patterns (ends with _id) + if strings.HasSuffix(columnLower, "_id") { + // Extract the potential referenced table name + referencedTable := strings.TrimSuffix(columnLower, "_id") + + // Check if there's a table with this name or similar + for _, otherTable := range tables { + if strings.EqualFold(otherTable.Name, referencedTable) { + // Found a potential relationship + edge := &model.Edge{ + ID: fmt.Sprintf("%s_%s_to_%s", table.Name, column.Name, otherTable.Name), + Source: table.Name, + Target: otherTable.Name, + Type: "foreign_key", + Label: fmt.Sprintf("%s.%s", table.Name, column.Name), + Metadata: map[string]interface{}{ + "source_column": column.Name, + "type": "inferred_foreign_key", + }, + } + edges = append(edges, edge) + break + } + } + } + } + } + + return edges +} \ No newline at end of file diff --git a/core/src/plugins/duckdb/utils.go b/core/src/plugins/duckdb/utils.go new file mode 100644 index 0000000..f46e79e --- /dev/null +++ b/core/src/plugins/duckdb/utils.go @@ -0,0 +1,404 @@ +/* + * Copyright 2025 Clidey, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package duckdb + +import ( + "fmt" + "regexp" + "strings" + "unicode" + + "github.com/clidey/whodb/core/src/engine" + "gorm.io/gorm" +) + +func (p *DuckDBPlugin) ConvertStringValueDuringMap(value, columnType string) (interface{}, error) { + return value, nil +} + +func (p *DuckDBPlugin) GetPrimaryKeyColQuery() string { + return ` + SELECT + column_name as pk_column + FROM + information_schema.key_column_usage + WHERE + table_name = ? + AND table_schema = 'main' + AND constraint_name LIKE '%_pkey' + ORDER BY + ordinal_position;` +} + +func (p *DuckDBPlugin) GetColTypeQuery() string { + return ` + SELECT + column_name AS column_name, + data_type AS data_type + FROM + information_schema.columns + WHERE + table_name = ? + AND table_schema = 'main' + ORDER BY + ordinal_position;` +} + +// DuckDB reserved keywords that require quoting or are forbidden +var duckdbReservedKeywords = map[string]bool{ + "SELECT": true, "FROM": true, "WHERE": true, "INSERT": true, "UPDATE": true, "DELETE": true, + "CREATE": true, "DROP": true, "ALTER": true, "TABLE": true, "INDEX": true, "VIEW": true, + "DATABASE": true, "SCHEMA": true, "COLUMN": true, "CONSTRAINT": true, "PRIMARY": true, + "FOREIGN": true, "KEY": true, "REFERENCES": true, "UNIQUE": true, "NOT": true, "NULL": true, + "DEFAULT": true, "CHECK": true, "UNION": true, "JOIN": true, "INNER": true, "LEFT": true, + "RIGHT": true, "FULL": true, "OUTER": true, "ON": true, "USING": true, "GROUP": true, + "ORDER": true, "BY": true, "HAVING": true, "LIMIT": true, "OFFSET": true, "AS": true, + "DISTINCT": true, "ALL": true, "EXISTS": true, "IN": true, "BETWEEN": true, "LIKE": true, + "ILIKE": true, "IS": true, "AND": true, "OR": true, "CASE": true, "WHEN": true, "THEN": true, + "ELSE": true, "END": true, "CAST": true, "EXTRACT": true, "SUBSTRING": true, "TRIM": true, + "COALESCE": true, "NULLIF": true, "GREATEST": true, "LEAST": true, "ARRAY": true, "STRUCT": true, + "MAP": true, "UNION": true, "EXCEPT": true, "INTERSECT": true, "WITH": true, "RECURSIVE": true, + "RETURNING": true, "CONFLICT": true, "DO": true, "NOTHING": true, "UPSERT": true, +} + +// validateIdentifier validates that an identifier is safe for use in SQL +func (p *DuckDBPlugin) validateIdentifier(identifier string) error { + // Check length limits (DuckDB supports up to 64 characters for identifiers) + if len(identifier) == 0 { + return fmt.Errorf("identifier cannot be empty") + } + if len(identifier) > 64 { + return fmt.Errorf("identifier exceeds maximum length of 64 characters") + } + + // Check for null bytes and other dangerous control characters + if strings.Contains(identifier, "\x00") { + return fmt.Errorf("identifier contains null byte") + } + + // Check for dangerous characters that could enable injection + for _, char := range identifier { + if char < 32 && char != 9 && char != 10 && char != 13 { // Allow tab, newline, carriage return + return fmt.Errorf("identifier contains invalid control character: %U", char) + } + } + + // Check for SQL injection patterns + suspiciousPatterns := []string{ + "--", "/*", "*/", ";", "xp_", "sp_", "@@", "EXEC", "EXECUTE", + "SCRIPT", "JAVASCRIPT", "VBSCRIPT", "ONLOAD", "ONERROR", + } + upperIdentifier := strings.ToUpper(identifier) + for _, pattern := range suspiciousPatterns { + if strings.Contains(upperIdentifier, pattern) { + return fmt.Errorf("identifier contains suspicious pattern: %s", pattern) + } + } + + return nil +} + +// EscapeSpecificIdentifier properly escapes and validates identifiers for DuckDB +func (p *DuckDBPlugin) EscapeSpecificIdentifier(identifier string) string { + // First validate the identifier for security + if err := p.validateIdentifier(identifier); err != nil { + // If validation fails, create a safe fallback identifier + // This prevents injection while maintaining functionality + safeIdentifier := p.createSafeIdentifier(identifier) + return safeIdentifier + } + + // Check if identifier needs quoting (contains special chars or is reserved) + needsQuoting := p.identifierNeedsQuoting(identifier) + + if needsQuoting { + // Escape double quotes by doubling them, then wrap in quotes + escaped := strings.Replace(identifier, "\"", "\"\"", -1) + return "\"" + escaped + "\"" + } + + // Return identifier as-is if it doesn't need quoting + return identifier +} + +// identifierNeedsQuoting determines if an identifier needs to be quoted +func (p *DuckDBPlugin) identifierNeedsQuoting(identifier string) bool { + // Check if it's a reserved keyword + if duckdbReservedKeywords[strings.ToUpper(identifier)] { + return true + } + + // Check if it starts with a number + if len(identifier) > 0 && unicode.IsDigit(rune(identifier[0])) { + return true + } + + // Check if it contains special characters that require quoting + for _, char := range identifier { + if !unicode.IsLetter(char) && !unicode.IsDigit(char) && char != '_' { + return true + } + } + + return false +} + +// createSafeIdentifier creates a safe fallback identifier when validation fails +func (p *DuckDBPlugin) createSafeIdentifier(original string) string { + // Create a safe identifier by removing dangerous characters + reg := regexp.MustCompile(`[^a-zA-Z0-9_]`) + safe := reg.ReplaceAllString(original, "_") + + // Ensure it doesn't start with a number + if len(safe) > 0 && unicode.IsDigit(rune(safe[0])) { + safe = "col_" + safe + } + + // Ensure it's not empty + if safe == "" { + safe = "safe_identifier" + } + + // Ensure it's not too long + if len(safe) > 64 { + safe = safe[:64] + } + + // Ensure it's not a reserved keyword + if duckdbReservedKeywords[strings.ToUpper(safe)] { + safe = safe + "_col" + } + + // Always quote safe identifiers since they may have been modified + return "\"" + safe + "\"" +} + +// GetGraphQueryDB returns the database connection for graph queries +func (p *DuckDBPlugin) GetGraphQueryDB(db *gorm.DB, schema string) *gorm.DB { + // For DuckDB, we don't need schema-specific handling since it uses 'main' schema + return db +} + +// GetCreateTableQuery generates a CREATE TABLE statement for DuckDB +func (p *DuckDBPlugin) GetCreateTableQuery(schema string, storageUnit string, columns []engine.Record) string { + var columnDefs []string + + for _, column := range columns { + // Use secure identifier escaping (handles quoting automatically) + columnName := p.EscapeSpecificIdentifier(column.Key) + columnType := column.Value + + // Validate and normalize column type for DuckDB + normalizedType := p.normalizeColumnType(columnType) + + // EscapeSpecificIdentifier now handles quoting, so don't add extra quotes + columnDefs = append(columnDefs, fmt.Sprintf("%s %s", columnName, normalizedType)) + } + + // Use secure identifier escaping for table name (handles quoting automatically) + tableName := p.EscapeSpecificIdentifier(storageUnit) + return fmt.Sprintf("CREATE TABLE %s (%s)", tableName, strings.Join(columnDefs, ", ")) +} + +// normalizeColumnType ensures the column type is valid for DuckDB and prevents injection +func (p *DuckDBPlugin) normalizeColumnType(columnType string) string { + // First sanitize the input to prevent SQL injection through column types + columnType = strings.TrimSpace(columnType) + + // Enhanced security validation with comprehensive pattern detection + if err := p.validateColumnType(columnType); err != nil { + // If validation fails, return safe default and log the attempt + return "VARCHAR" + } + + upperType := strings.ToUpper(columnType) + + // Map common SQL types to DuckDB equivalents (simple types first) + switch upperType { + case "INT", "INT4": + return "INTEGER" + case "INT8": + return "BIGINT" + case "INT2": + return "SMALLINT" + case "INT1": + return "TINYINT" + case "FLOAT4": + return "REAL" + case "FLOAT8": + return "DOUBLE" + case "BOOL": + return "BOOLEAN" + case "STRING": + return "VARCHAR" + default: + // Check if it's a valid DuckDB type with parameters (e.g., VARCHAR(255)) + validatedType := p.parseAndValidateParameterizedType(upperType) + if validatedType != "" { + return validatedType + } + + // Check simple types without parameters + if p.isValidSimpleDuckDBType(upperType) { + return upperType + } + + // Default to VARCHAR for unknown types + return "VARCHAR" + } +} + +// validateColumnType performs comprehensive security validation on column types +func (p *DuckDBPlugin) validateColumnType(columnType string) error { + // Check length limits + if len(columnType) == 0 { + return fmt.Errorf("column type cannot be empty") + } + if len(columnType) > 100 { + return fmt.Errorf("column type exceeds maximum length") + } + + // Check for null bytes and dangerous control characters + if strings.Contains(columnType, "\x00") { + return fmt.Errorf("column type contains null byte") + } + + // Check for dangerous characters that could enable injection + for _, char := range columnType { + if char < 32 && char != 9 && char != 10 && char != 13 { + return fmt.Errorf("column type contains invalid control character") + } + } + + // Enhanced suspicious pattern detection + suspiciousPatterns := []string{ + "--", "/*", "*/", ";", "DROP", "DELETE", "INSERT", "UPDATE", "CREATE", "ALTER", + "EXEC", "EXECUTE", "SCRIPT", "XP_", "SP_", "@@", "UNION", "SELECT", "FROM", + "WHERE", "ORDER", "GROUP", "HAVING", "DECLARE", "SET", "GRANT", "REVOKE", + "TRUNCATE", "MERGE", "CALL", "RETURN", "THROW", "TRY", "CATCH", "BEGIN", + "END", "IF", "ELSE", "WHILE", "FOR", "CURSOR", "PROCEDURE", "FUNCTION", + "TRIGGER", "VIEW", "SCHEMA", "DATABASE", "BACKUP", "RESTORE", "SHUTDOWN", + } + + upperType := strings.ToUpper(columnType) + for _, pattern := range suspiciousPatterns { + if strings.Contains(upperType, pattern) { + return fmt.Errorf("column type contains suspicious pattern: %s", pattern) + } + } + + // Validate that column type only contains strictly allowed characters + // More restrictive: only letters, numbers, parentheses, spaces, comma, underscore + allowedChars := regexp.MustCompile(`^[a-zA-Z0-9()\s,_]+$`) + if !allowedChars.MatchString(columnType) { + return fmt.Errorf("column type contains invalid characters") + } + + return nil +} + +// parseAndValidateParameterizedType handles types with parameters like VARCHAR(255) or DECIMAL(10,2) +func (p *DuckDBPlugin) parseAndValidateParameterizedType(typeStr string) string { + // Match pattern: TYPE(param1) or TYPE(param1,param2) + paramPattern := regexp.MustCompile(`^([A-Z]+)\s*\(\s*(\d+)(?:\s*,\s*(\d+))?\s*\)$`) + matches := paramPattern.FindStringSubmatch(typeStr) + + if len(matches) == 0 { + return "" // Not a parameterized type + } + + baseType := matches[1] + param1 := matches[2] + param2 := "" + if len(matches) > 3 && matches[3] != "" { + param2 = matches[3] + } + + // Validate base type is allowed + if !p.isValidSimpleDuckDBType(baseType) { + return "" + } + + // Validate parameters are within reasonable bounds + if !p.validateTypeParameters(baseType, param1, param2) { + return "" + } + + // Reconstruct the validated type + if param2 != "" { + return fmt.Sprintf("%s(%s,%s)", baseType, param1, param2) + } else { + return fmt.Sprintf("%s(%s)", baseType, param1) + } +} + +// validateTypeParameters ensures type parameters are within valid ranges +func (p *DuckDBPlugin) validateTypeParameters(baseType, param1, param2 string) bool { + // Convert parameters to integers for validation + p1, err1 := fmt.Atoi(param1) + if err1 != nil { + return false + } + + var p2 int + var err2 error + if param2 != "" { + p2, err2 = fmt.Atoi(param2) + if err2 != nil { + return false + } + } + + // Validate parameter ranges based on type + switch baseType { + case "VARCHAR", "CHAR", "TEXT": + // Length parameter should be reasonable (1 to 65535) + return p1 > 0 && p1 <= 65535 && param2 == "" + case "DECIMAL", "NUMERIC": + // Precision and scale parameters + if param2 == "" { + // Only precision specified + return p1 > 0 && p1 <= 38 + } else { + // Both precision and scale + return p1 > 0 && p1 <= 38 && p2 >= 0 && p2 <= p1 + } + case "FLOAT", "DOUBLE", "REAL": + // Precision parameter for float types + return p1 > 0 && p1 <= 53 && param2 == "" + default: + // For other types, be conservative and don't allow parameters + return false + } +} + +// isValidSimpleDuckDBType checks if a type string is a valid simple DuckDB type (no parameters) +func (p *DuckDBPlugin) isValidSimpleDuckDBType(typeStr string) bool { + // Comprehensive list of valid DuckDB base types + validTypes := map[string]bool{ + "BOOLEAN": true, "TINYINT": true, "SMALLINT": true, "INTEGER": true, "BIGINT": true, + "HUGEINT": true, "UTINYINT": true, "USMALLINT": true, "UINTEGER": true, "UBIGINT": true, + "REAL": true, "DOUBLE": true, "DECIMAL": true, "NUMERIC": true, "VARCHAR": true, + "CHAR": true, "TEXT": true, "STRING": true, "BLOB": true, "BYTEA": true, + "DATE": true, "TIME": true, "TIMESTAMP": true, "TIMESTAMPTZ": true, "INTERVAL": true, + "UUID": true, "JSON": true, "ARRAY": true, "LIST": true, "STRUCT": true, "MAP": true, + "UNION": true, "ENUM": true, + } + + return validTypes[typeStr] +} + diff --git a/core/src/plugins/gorm/db.go b/core/src/plugins/gorm/db.go index 1c5431d..4ae55de 100644 --- a/core/src/plugins/gorm/db.go +++ b/core/src/plugins/gorm/db.go @@ -37,6 +37,12 @@ const ( readOnlyKey = "Readonly" debugKey = "Debug" connectionTimeoutKey = "Connection Timeout" + + // DuckDB specific connection options + duckdbAccessModeKey = "Access Mode" // read_only, read_write + duckdbThreadsKey = "Threads" // number of threads for parallel execution + duckdbMaxMemoryKey = "Max Memory" // maximum memory usage (e.g., "1GB") + duckdbTempDirectoryKey = "Temp Directory" // temporary directory for intermediate results ) // DefaultDatabasePorts maps database systems to their standard default ports @@ -45,6 +51,7 @@ var defaultDatabasePorts = map[engine.DatabaseType]string{ engine.DatabaseType_MariaDB: "3306", engine.DatabaseType_Postgres: "5432", engine.DatabaseType_Sqlite3: "0", // SQLite is file-based, no port + engine.DatabaseType_DuckDB: "0", // DuckDB is file-based, no port engine.DatabaseType_ClickHouse: "9000", // TCP port (HTTP port is 8123) engine.DatabaseType_MongoDB: "27017", engine.DatabaseType_ElasticSearch: "9200", // HTTP port (Transport port is 9300) @@ -72,6 +79,12 @@ type ConnectionInput struct { ConnectionTimeout int + //duckdb + DuckDBAccessMode string + DuckDBThreads string + DuckDBMaxMemory string + DuckDBTempDirectory string + ExtraOptions map[string]string `validate:"omitnil"` } @@ -111,6 +124,12 @@ func (p *GormPlugin) ParseConnectionConfig(config *engine.PluginConfig) (*Connec return nil, err } + //duckdb specific + duckdbAccessMode := common.GetRecordValueOrDefault(config.Credentials.Advanced, duckdbAccessModeKey, "read_write") + duckdbThreads := common.GetRecordValueOrDefault(config.Credentials.Advanced, duckdbThreadsKey, "") + duckdbMaxMemory := common.GetRecordValueOrDefault(config.Credentials.Advanced, duckdbMaxMemoryKey, "") + duckdbTempDirectory := common.GetRecordValueOrDefault(config.Credentials.Advanced, duckdbTempDirectoryKey, "") + input := &ConnectionInput{ Username: url.PathEscape(config.Credentials.Username), Password: url.PathEscape(config.Credentials.Password), @@ -125,6 +144,10 @@ func (p *GormPlugin) ParseConnectionConfig(config *engine.PluginConfig) (*Connec ReadOnly: readOnly, Debug: debug, ConnectionTimeout: connectionTimeout, + DuckDBAccessMode: duckdbAccessMode, + DuckDBThreads: duckdbThreads, + DuckDBMaxMemory: duckdbMaxMemory, + DuckDBTempDirectory: duckdbTempDirectory, } // if this config is a pre-configured profile, then allow reading of additional params @@ -132,7 +155,7 @@ func (p *GormPlugin) ParseConnectionConfig(config *engine.PluginConfig) (*Connec params := make(map[string]string) for _, record := range config.Credentials.Advanced { switch record.Key { - case portKey, parseTimeKey, locKey, allowClearTextPasswordsKey, sslModeKey, httpProtocolKey, readOnlyKey, debugKey, connectionTimeoutKey: + case portKey, parseTimeKey, locKey, allowClearTextPasswordsKey, sslModeKey, httpProtocolKey, readOnlyKey, debugKey, connectionTimeoutKey, duckdbAccessModeKey, duckdbThreadsKey, duckdbMaxMemoryKey, duckdbTempDirectoryKey: continue default: params[record.Key] = url.QueryEscape(record.Value) // todo: this may break for postgres diff --git a/core/src/src.go b/core/src/src.go index 713540f..c6114c2 100644 --- a/core/src/src.go +++ b/core/src/src.go @@ -19,6 +19,7 @@ package src import ( "fmt" "github.com/clidey/whodb/core/src/plugins/clickhouse" + "github.com/clidey/whodb/core/src/plugins/duckdb" "github.com/clidey/whodb/core/src/plugins/elasticsearch" "github.com/clidey/whodb/core/src/plugins/mongodb" "github.com/clidey/whodb/core/src/plugins/mysql" @@ -38,6 +39,7 @@ func InitializeEngine() *engine.Engine { MainEngine.RegistryPlugin(mysql.NewMySQLPlugin()) MainEngine.RegistryPlugin(mysql.NewMyMariaDBPlugin()) MainEngine.RegistryPlugin(sqlite3.NewSqlite3Plugin()) + MainEngine.RegistryPlugin(duckdb.NewDuckDBPlugin()) MainEngine.RegistryPlugin(mongodb.NewMongoDBPlugin()) MainEngine.RegistryPlugin(redis.NewRedisPlugin()) MainEngine.RegistryPlugin(elasticsearch.NewElasticSearchPlugin()) diff --git a/frontend/src/components/icons.tsx b/frontend/src/components/icons.tsx index d8c3fe7..5729248 100644 --- a/frontend/src/components/icons.tsx +++ b/frontend/src/components/icons.tsx @@ -193,6 +193,11 @@ export const Icons = { , + DuckDB: + + + + , Anthropic: Anthropic , OpenAICompatible: diff --git a/frontend/src/generated/graphql.tsx b/frontend/src/generated/graphql.tsx index a5c96ec..3747932 100644 --- a/frontend/src/generated/graphql.tsx +++ b/frontend/src/generated/graphql.tsx @@ -68,6 +68,7 @@ export type Column = { export enum DatabaseType { ClickHouse = 'ClickHouse', + DuckDB = 'DuckDB', ElasticSearch = 'ElasticSearch', MariaDb = 'MariaDB', MongoDb = 'MongoDB', diff --git a/frontend/src/pages/auth/login.tsx b/frontend/src/pages/auth/login.tsx index a1fc2f8..df3b0c8 100644 --- a/frontend/src/pages/auth/login.tsx +++ b/frontend/src/pages/auth/login.tsx @@ -88,6 +88,17 @@ export const databaseTypeDropdownItems: IDropdownItem>[] "Debug": "disable" } }, + { + id: DatabaseType.DuckDB, + label: "DuckDB", + icon: Icons.Logos.DuckDB, + extra: { + "Access Mode": "read_write", + "Threads": "4", + "Max Memory": "75%", + "Temp Directory": "" + } + }, ] export const LoginPage: FC = () => { @@ -118,7 +129,7 @@ export const LoginPage: FC = () => { const handleSubmit = useCallback(() => { if (([DatabaseType.MySql, DatabaseType.Postgres].includes(databaseType.id as DatabaseType) && (hostName.length === 0 || database.length === 0 || username.length === 0)) - || (databaseType.id === DatabaseType.Sqlite3 && database.length === 0) + || ((databaseType.id === DatabaseType.Sqlite3 || databaseType.id === DatabaseType.DuckDB) && database.length === 0) || ((databaseType.id === DatabaseType.MongoDb || databaseType.id === DatabaseType.Redis) && (hostName.length === 0))) { return setError("All fields are required"); } @@ -199,6 +210,13 @@ export const LoginPage: FC = () => { }, }); } + if (item.id === DatabaseType.DuckDB) { + getDatabases({ + variables: { + type: DatabaseType.DuckDB, + }, + }); + } setHostName(""); setUsername(""); setPassword(""); @@ -303,13 +321,13 @@ export const LoginPage: FC = () => { }, [databaseType.id]); const fields = useMemo(() => { - if (databaseType.id === DatabaseType.Sqlite3) { + if (databaseType.id === DatabaseType.Sqlite3 || databaseType.id === DatabaseType.DuckDB) { return <> ({ id: database, label: database, icon: Icons.Database, - })) ?? []} loading={databasesLoading} noItemsLabel="Not available. Mount SQLite file in /db/" fullWidth={true} value={{ + })) ?? []} loading={databasesLoading} noItemsLabel="Not available. Mount database file in /db/" fullWidth={true} value={{ id: database, label: database, icon: Icons.Database,