Skip to content

Commit bdb2c4e

Browse files
committed
Data normalization upon load to DuckDB + cleanup templates
Clean up templates Templates
1 parent 35bdbf3 commit bdb2c4e

File tree

133 files changed

+7634
-4580
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

133 files changed

+7634
-4580
lines changed

frontend/src/components/widgets/MarkdownRendererWidget.jsx

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ const MarkdownRendererWidget = ({ markdown, value, error, className }) => {
4646

4747
return (
4848
<Card className={cn('overflow-hidden', className)}>
49-
<CardContent className="prose max-w-none prose-pre:p-0 prose-pre:bg-transparent prose-pre:m-0 prose-code:bg-muted prose-code:px-1.5 prose-code:py-0.5 prose-code:rounded prose-code:text-foreground prose-code:before:content-none prose-code:after:content-none">
49+
<CardContent className="prose max-w-none prose-pre:p-0 prose-pre:bg-transparent prose-pre:m-0 prose-code:bg-muted prose-code:px-1.5 prose-code:py-0.5 prose-code:rounded prose-code:text-foreground prose-code:before:content-none prose-code:after:content-none prose-headings:mt-2 prose-headings:mb-2 prose-p:my-0 prose-p:mb-0.5">
5050
<ReactMarkdown
5151
remarkPlugins={[remarkGfm, remarkSlug]}
5252
components={{

preswald/engine/managers/data.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ def load_json_source(config: dict[str, Any]) -> pd.DataFrame:
3838
f"Invalid record_path '{record_path}' for JSON file '{path}': {e}"
3939
) from e
4040

41-
4241
# Normalize or convert data if "flatten"
4342
try:
4443
if flatten:
@@ -51,7 +50,6 @@ def load_json_source(config: dict[str, Any]) -> pd.DataFrame:
5150
) from e
5251

5352

54-
5553
# Database Configs ############################################################
5654
@dataclass
5755
class ClickhouseConfig:
@@ -118,6 +116,7 @@ class S3CSVConfig:
118116
s3_use_ssl: bool = False
119117
s3_url_style: str = "path"
120118

119+
121120
class DataSource:
122121
"""Base class for all data sources"""
123122

@@ -182,7 +181,14 @@ def __init__(
182181
self._table_name = f"csv_{name}_{uuid.uuid4().hex[:8]}"
183182
self._duckdb.execute(f"""
184183
CREATE TABLE {self._table_name} AS
185-
SELECT * FROM read_csv_auto('{self.path}')
184+
SELECT * FROM read_csv_auto('{self.path}',
185+
header=true,
186+
auto_detect=true,
187+
ignore_errors=true,
188+
normalize_names=true,
189+
sample_size=-1,
190+
all_varchar=true
191+
)
186192
""")
187193

188194
def query(self, sql: str) -> pd.DataFrame:

preswald/templates/api-docs/hello.py.template

-13
This file was deleted.

preswald/templates/api-docs/preswald.toml.template

-20
This file was deleted.

preswald/templates/api-docs/sample.csv.template

-13
This file was deleted.

preswald/templates/bacteria-evolution/hello.py.template

-13
This file was deleted.

preswald/templates/bacteria-evolution/preswald.toml.template

-20
This file was deleted.

preswald/templates/bacteria-evolution/sample.csv.template

-151
This file was deleted.

0 commit comments

Comments
 (0)