Skip to content

Commit 33e833a

Browse files
committed
Merge remote-tracking branch 'origin/main' into reduce-errors
2 parents 5fbb3ff + 6ae18df commit 33e833a

File tree

21 files changed

+6634
-5373
lines changed

21 files changed

+6634
-5373
lines changed

docs/package/README.md

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,26 @@ comment on existing issues to extend them to your needs or to add solution ideas
1313
## Usage
1414

1515
pydiverse.pipedag can either be installed via pypi with `pip install pydiverse-pipedag pydot` or via
16-
conda-forge with `conda install pydiverse-pipedag pydot -c conda-forge`. If you don't use duckdb for
17-
testing, you can obmit it here. However, it is needed to run the following example.
16+
conda-forge with `conda install pydiverse-pipedag pydot -c conda-forge`. Our recommendation would be
17+
to use [pixi](https://pixi.sh/latest/) which is also based on conda-forge:
18+
19+
```bash
20+
mkdir my_project
21+
pixi init
22+
pixi add pydiverse-pipedag pydot
23+
```
24+
25+
With pixi, you run python like this:
26+
27+
```bash
28+
pixi run python -c 'import pydiverse.pipedag'
29+
```
30+
31+
or this:
32+
33+
```bash
34+
pixi run python my_script.py
35+
```
1836

1937
## Example
2038

docs/source/changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## 0.12.0 (2025-10-07)
4+
- Support pydiverse.common 0.4.1, pydiverse.transform 0.6.0, pydiverse.colspec 0.3.0.
5+
- Structlog logger initialization changed to stdlib logger factory to support dynamic loglevel filter in tests.
6+
- Switch cache misses from warning to info log level.
7+
38
## 0.11.0 (2025-10-01)
49
- Support View as task output to allow multi-parquet fusion in ParquetTableStore or basic column selection/renaming
510
outside consumer task.

docs/source/examples/multi_instance_pipeline.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ technical_setups:
9595

9696
args:
9797
create_database_if_not_exists: True
98-
# print select statements before being encapsualted in materialize expressions and tables before writing to
98+
# print select statements before being encapsulated in materialize expressions and tables before writing to
9999
# database
100100
print_materialize: true
101101
# print final sql statements

docs/source/reference/config.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,7 @@ technical_setups:
689689

690690
args:
691691
create_database_if_not_exists: True
692-
# print select statements before being encapsualted in materialize expressions and tables before writing to
692+
# print select statements before being encapsulated in materialize expressions and tables before writing to
693693
# database
694694
print_materialize: true
695695
# print final sql statements

pixi.lock

Lines changed: 6544 additions & 5319 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pixi.toml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ platforms = ["linux-64", "osx-64", "osx-arm64", "win-64"] # "linux-aarch64"
55

66
[dependencies]
77
python = ">=3.10.18,<3.14"
8-
pydiverse-common = ">=0.3.15,<0.4"
8+
pydiverse-common = ">=0.4.1,<0.5"
99
typing-extensions = ">=4.14.1,<5"
1010
networkx = ">=3.4,<4"
1111
attrs = ">=25.3.0,<26"
@@ -15,17 +15,17 @@ msgpack-python = ">=1.1.1,<2"
1515
packaging = ">=24.2,<26"
1616
python-box = ">=7.3.2,<8"
1717
pyyaml = ">=6.0.2,<7"
18-
cryptography = ">=45.0.5,<46"
18+
cryptography = ">=45.0.5,<47"
1919
click = ">=8.1.8,<9"
2020
pyparsing = ">=3.2.3,<4"
2121
sqlalchemy = ">=1.4.54,<3"
2222
pandas = ">=1.5.3,<3"
23-
pyarrow = ">=20.0.0,<21"
24-
duckdb = ">=0.10.3,<2"
23+
pyarrow = ">=20.0.0,<22"
24+
duckdb = ">=0.10.3,<1.4"
2525
duckdb-engine = ">=0.15.0,<0.18"
2626
polars = ">=1.2.1,<2"
2727
fsspec = ">=2025.5.1,<2026"
28-
universal_pathlib = ">=0.2.6,<0.3"
28+
universal_pathlib = ">=0.2.6,<0.4"
2929

3030

3131
[target.osx-arm64.dependencies]
@@ -54,12 +54,11 @@ kazoo = ">=2.8.0"
5454
dask = ">=2022.1.0"
5555

5656
[feature.prefect2.dependencies]
57-
prefect = ">=2.13.5,<3a0"
57+
prefect = ">=2.14.2,<3a0"
5858
pydantic = ">=1.10.9"
5959

6060
[feature.prefect3.dependencies]
61-
prefect = ">=3.4.6,<4a0"
62-
whenever = ">=0.8.5,<1.0"
61+
prefect = ">=3.4.22,<4a0"
6362

6463
[feature.snowflake.dependencies]
6564
snowflake-sqlalchemy = ">=1.6.1"
@@ -85,13 +84,13 @@ pytsql = ">=1.4.0,<2"
8584
bcpandas = ">=2.6.3,<3"
8685

8786
[feature.pdtransform-new.dependencies]
88-
pydiverse-transform = ">=0.5.6,<0.6"
87+
pydiverse-transform = ">=0.6.0,<0.7"
8988

9089
[feature.pdtransform-old.dependencies]
9190
pydiverse-transform = "<0.2"
9291

9392
[feature.colspec.dependencies]
94-
pydiverse-colspec = ">=0.2.8,<0.3"
93+
pydiverse-colspec = ">=0.3.0,<0.4"
9594

9695
[feature.dataframely.dependencies]
9796
dataframely = ">=1.2.1,<2"
@@ -101,7 +100,8 @@ dataframely = ">=1.2.1,<2"
101100
platforms = ["linux-64", "osx-arm64", "win-64"]
102101

103102
[feature.ibm-db.dependencies]
104-
ibm_db = ">=3.2.5, <3.2.7" # somehoew there is a version conflict for 3.2.7 that fails empty queries
103+
#Current pixi install keeps ibm_db 3.2.6 which works.
104+
ibm_db = ">=3.2.5, <3.2.7" # somehow there is a version conflict for 3.2.7 that fails empty queries
105105
ibm_db_sa = ">=0.3.8"
106106

107107
# S3

pyproject.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "pydiverse-pipedag"
3-
version = "0.11.0"
3+
version = "0.12.0"
44
description = "A pipeline orchestration library executing tasks within one python session. It takes care of SQL table (de)materialization, caching and cache invalidation. Blob storage is supported as well for example for storing model files."
55
authors = [
66
{ name = "QuantCo, Inc." },
@@ -30,7 +30,7 @@ classifiers = [
3030
]
3131

3232
dependencies = [
33-
"pydiverse-common >=0.3.15,<0.4",
33+
"pydiverse-common >=0.4.1,<0.5",
3434
"typing-extensions >=4.14.1,<5",
3535
"networkx >=3.4,<4",
3636
"attrs >=25.3.0,<26",
@@ -40,17 +40,17 @@ dependencies = [
4040
"packaging >=24.2,<26",
4141
"python-box >=7.3.2,<8",
4242
"pyyaml >=6.0.2,<7",
43-
"cryptography >=45.0.5,<46",
43+
"cryptography >=45.0.5,<47",
4444
"click >=8.1.8,<9",
4545
"pyparsing >=3.2.3,<4",
4646
"sqlalchemy >=1.4.54,<3",
4747
"pandas >=1.5.3,<3",
48-
"pyarrow >=20.0.0,<21",
49-
"duckdb >=0.10.3,<2", # py311pdsa1 requires < 1.3.0
48+
"pyarrow >=20.0.0,<22",
49+
"duckdb >=0.10.3,<1.4", # 1.4.0 has bug with sqlalchemy; py311pdsa1 requires < 1.3.0
5050
"duckdb-engine >=0.15.0,<0.18",
5151
"polars >=1.2.1,<2", # py311pdsa1all requires < 1.30.0
5252
"fsspec >=2025.5.1,<2026",
53-
"universal_pathlib >=0.2.6,<0.3",
53+
"universal_pathlib >=0.2.6,<0.4",
5454
]
5555

5656
[tool.hatch.build.targets.wheel]

pytest.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ markers =
1616
dask: a test that requires dask [DaskEngine]
1717
prefect: a test that requires prefect [PrefectEngine]
1818

19-
lock_tests: tests exercising lock manager implementations (enabled by default, can be disabled with --no-lock-tests)
19+
lock_tests: tests exercising lock manager implementations (enabled by default, can be disabled with --no-lock_tests)
2020

2121
instances: marker used to run an test with different instances
2222
skip_instances: fixture used to skip running test for a list of instances

src/pydiverse/pipedag/backend/table/cache/parquet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ def materialize(
206206

207207
if isinstance(df, pl.LazyFrame):
208208
df = df.collect()
209-
df.write_parquet(path)
209+
df.write_parquet(str(path))
210210
# intentionally don't apply annotation checks because they might also be done
211211
# within polars table hook of actual table store
212212

@@ -222,7 +222,7 @@ def _execute_query(
222222
) -> pl.DataFrame:
223223
_ = as_type
224224
path = store.get_table_path(table, ".parquet")
225-
df = pl.read_parquet(path, n_rows=limit)
225+
df = pl.read_parquet(str(path), n_rows=limit)
226226
if issubclass(as_type, pl.LazyFrame):
227227
return df.lazy()
228228
return df

src/pydiverse/pipedag/backend/table/sql/dialects/duckdb_parquet.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,9 @@ def materialize(
911911
)
912912

913913
df = table.obj
914-
df.to_parquet(file_path, index=False, storage_options=store.get_storage_options("fsspec", file_path.protocol))
914+
df.to_parquet(
915+
str(file_path), index=False, storage_options=store.get_storage_options("fsspec", file_path.protocol)
916+
)
915917
store.execute(CreateViewAsSelect(table.name, schema, store._read_parquet_query(file_path)))
916918
store.metadata_track_view(table.name, schema.get(), file_path.as_uri(), "parquet")
917919

@@ -968,7 +970,7 @@ def retrieve(
968970
if limit is not None:
969971
df = ds.dataset(pyarrow_path, filesystem=pyarrow_fs).scanner().head(limit).to_pandas()
970972
else:
971-
df = pd.read_parquet(path, storage_options=store.get_storage_options("fsspec", path.protocol))
973+
df = pd.read_parquet(str(path), storage_options=store.get_storage_options("fsspec", path.protocol))
972974
import pyarrow.parquet
973975

974976
# attention: with categorical columns, it might be necessary to fuse dictionaries of all parquet files
@@ -1050,9 +1052,10 @@ def _execute_materialize_polars(table, store, stage_name):
10501052
"Storing polars tables with custom storage options is not supported for polars < 1.3.0. "
10511053
f"Current version is {pl.__version__}: {options}"
10521054
)
1053-
df.write_parquet(file_path, storage_options=options)
1055+
# at some point polars supported UPath, but 1.33.1 does not
1056+
df.write_parquet(str(file_path), storage_options=options)
10541057
else:
1055-
df.write_parquet(file_path)
1058+
df.write_parquet(str(file_path))
10561059
store.execute(CreateViewAsSelect(table.name, schema, store._read_parquet_query(file_path)))
10571060
store.metadata_track_view(table.name, schema.get(), file_path.as_uri(), "parquet")
10581061

@@ -1071,9 +1074,11 @@ def _execute_query(
10711074
if isinstance(view.src, Iterable):
10721075
file_paths = [store.get_table_path(tbl) for tbl in view.src]
10731076
protocol = file_paths[0].protocol
1077+
file_paths = [str(path) for path in file_paths]
10741078
else:
10751079
file_paths = store.get_table_path(view.src)
10761080
protocol = file_paths.protocol
1081+
file_paths = str(file_paths)
10771082
lf = pl.scan_parquet(
10781083
file_paths, n_rows=limit, storage_options=store.get_storage_options("polars", protocol)
10791084
)
@@ -1090,7 +1095,7 @@ def _execute_query(
10901095
else:
10911096
file_path = store.get_table_path(table)
10921097
df = pl.read_parquet(
1093-
file_path, n_rows=limit, storage_options=store.get_storage_options("polars", file_path.protocol)
1098+
str(file_path), n_rows=limit, storage_options=store.get_storage_options("polars", file_path.protocol)
10941099
)
10951100
return df
10961101

0 commit comments

Comments
 (0)