From e5b7191ce3ed3ccb3c065df6fc0f4e25eacd2f02 Mon Sep 17 00:00:00 2001 From: comphead Date: Wed, 23 Jul 2025 08:09:10 -0700 Subject: [PATCH 1/8] chore: migrate to DF 49.0.0 --- native/Cargo.lock | 573 ++++++++++++------ native/Cargo.toml | 10 +- native/core/src/execution/operators/filter.rs | 23 +- native/core/src/execution/planner.rs | 12 +- .../src/execution/shuffle/shuffle_writer.rs | 4 +- 5 files changed, 421 insertions(+), 201 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index b6e4c0e0f7..2719fe1654 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -244,6 +244,7 @@ dependencies = [ "arrow-schema", "flatbuffers", "lz4_flex", + "zstd", ] [[package]] @@ -1263,9 +1264,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6cb8c2c81eada072059983657d6c9caf3fddefc43b4a65551d243253254a96" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "arrow-ipc", @@ -1275,23 +1275,23 @@ dependencies = [ "chrono", "datafusion-catalog", "datafusion-catalog-listing", - "datafusion-common", + "datafusion-common 49.0.0", "datafusion-common-runtime", "datafusion-datasource", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-expr-common 49.0.0", + "datafusion-functions 49.0.0", + "datafusion-functions-aggregate 49.0.0", + "datafusion-functions-nested 49.0.0", "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-physical-expr 49.0.0", + "datafusion-physical-expr-common 49.0.0", "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-session", @@ -1313,19 +1313,18 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7be8d1b627843af62e447396db08fe1372d882c0eb8d0ea655fd1fbc33120ee" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "async-trait", "dashmap", - "datafusion-common", + "datafusion-common 49.0.0", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-physical-expr 49.0.0", "datafusion-physical-plan", "datafusion-session", "datafusion-sql", @@ -1339,19 +1338,18 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38ab16c5ae43f65ee525fc493ceffbc41f40dee38b01f643dfcfc12959e92038" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "async-trait", "datafusion-catalog", - "datafusion-common", + "datafusion-common 49.0.0", "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-physical-expr 49.0.0", + "datafusion-physical-expr-common 49.0.0", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1376,7 +1374,7 @@ dependencies = [ "datafusion-comet-objectstore-hdfs", "datafusion-comet-proto", "datafusion-comet-spark-expr", - "datafusion-functions-nested", + "datafusion-functions-nested 48.0.0", "datafusion-spark", "futures", "hex", @@ -1464,6 +1462,27 @@ dependencies = [ "indexmap", "libc", "log", + "paste", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "ahash", + "arrow", + "arrow-ipc", + "base64", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap", + "libc", + "log", "object_store", "parquet", "paste", @@ -1474,9 +1493,8 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16015071202d6133bc84d72756176467e3e46029f3ce9ad2cb788f9b1ff139b2" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "futures", "log", @@ -1485,20 +1503,19 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b77523c95c89d2a7eb99df14ed31390e04ab29b43ff793e562bdc1716b07e17b" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "async-trait", "bytes", "chrono", - "datafusion-common", + "datafusion-common 49.0.0", "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-physical-expr 49.0.0", + "datafusion-physical-expr-common 49.0.0", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1515,21 +1532,20 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40d25c5e2c0ebe8434beeea997b8e88d55b3ccc0d19344293f2373f65bc524fc" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "async-trait", "bytes", "datafusion-catalog", - "datafusion-common", + "datafusion-common 49.0.0", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-physical-expr 49.0.0", + "datafusion-physical-expr-common 49.0.0", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1540,21 +1556,20 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dc6959e1155741ab35369e1dc7673ba30fc45ed568fad34c01b7cb1daeb4d4c" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "async-trait", "bytes", "datafusion-catalog", - "datafusion-common", + "datafusion-common 49.0.0", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-physical-expr 49.0.0", + "datafusion-physical-expr-common 49.0.0", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1565,24 +1580,24 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7a6afdfe358d70f4237f60eaef26ae5a1ce7cb2c469d02d5fc6c7fd5d84e58b" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "async-trait", "bytes", "datafusion-catalog", - "datafusion-common", + "datafusion-common 49.0.0", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-functions-aggregate 49.0.0", + "datafusion-physical-expr 49.0.0", + "datafusion-physical-expr-common 49.0.0", "datafusion-physical-optimizer", "datafusion-physical-plan", + "datafusion-pruning", "datafusion-session", "futures", "itertools 0.14.0", @@ -1600,6 +1615,11 @@ version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bcd8a3e3e3d02ea642541be23d44376b5d5c37c2938cce39b3873cdf7186eea" +[[package]] +name = "datafusion-doc" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" + [[package]] name = "datafusion-execution" version = "48.0.0" @@ -1608,8 +1628,26 @@ checksum = "670da1d45d045eee4c2319b8c7ea57b26cf48ab77b630aaa50b779e406da476a" dependencies = [ "arrow", "dashmap", - "datafusion-common", - "datafusion-expr", + "datafusion-common 48.0.0", + "datafusion-expr 48.0.0", + "futures", + "log", + "object_store", + "parking_lot", + "rand 0.9.1", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-execution" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "arrow", + "dashmap", + "datafusion-common 49.0.0", + "datafusion-expr 49.0.0", "futures", "log", "object_store", @@ -1627,12 +1665,32 @@ checksum = "b3a577f64bdb7e2cc4043cd97f8901d8c504711fde2dbcb0887645b00d7c660b" dependencies = [ "arrow", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", + "datafusion-common 48.0.0", + "datafusion-doc 48.0.0", + "datafusion-expr-common 48.0.0", + "datafusion-functions-aggregate-common 48.0.0", + "datafusion-functions-window-common 48.0.0", + "datafusion-physical-expr-common 48.0.0", + "indexmap", + "paste", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common 49.0.0", + "datafusion-doc 49.0.0", + "datafusion-expr-common 49.0.0", + "datafusion-functions-aggregate-common 49.0.0", + "datafusion-functions-window-common 49.0.0", + "datafusion-physical-expr-common 49.0.0", "indexmap", "paste", "serde_json", @@ -1646,7 +1704,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b7916806ace3e9f41884f230f7f38ebf0e955dfbd88266da1826f29a0b9a6a" dependencies = [ "arrow", - "datafusion-common", + "datafusion-common 48.0.0", + "indexmap", + "itertools 0.14.0", + "paste", +] + +[[package]] +name = "datafusion-expr-common" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "arrow", + "datafusion-common 49.0.0", "indexmap", "itertools 0.14.0", "paste", @@ -1657,6 +1727,30 @@ name = "datafusion-functions" version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fb31c9dc73d3e0c365063f91139dc273308f8a8e124adda9898db8085d68357" +dependencies = [ + "arrow", + "arrow-buffer", + "base64", + "chrono", + "datafusion-common 48.0.0", + "datafusion-doc 48.0.0", + "datafusion-execution 48.0.0", + "datafusion-expr 48.0.0", + "datafusion-expr-common 48.0.0", + "datafusion-macros 48.0.0", + "hex", + "itertools 0.14.0", + "log", + "rand 0.9.1", + "regex", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "arrow-buffer", @@ -1664,12 +1758,12 @@ dependencies = [ "blake2", "blake3", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-macros", + "datafusion-common 49.0.0", + "datafusion-doc 49.0.0", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-expr-common 49.0.0", + "datafusion-macros 49.0.0", "hex", "itertools 0.14.0", "log", @@ -1689,14 +1783,34 @@ checksum = "ebb72c6940697eaaba9bd1f746a697a07819de952b817e3fb841fb75331ad5d4" dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 48.0.0", + "datafusion-doc 48.0.0", + "datafusion-execution 48.0.0", + "datafusion-expr 48.0.0", + "datafusion-functions-aggregate-common 48.0.0", + "datafusion-macros 48.0.0", + "datafusion-physical-expr 48.0.0", + "datafusion-physical-expr-common 48.0.0", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 49.0.0", + "datafusion-doc 49.0.0", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-functions-aggregate-common 49.0.0", + "datafusion-macros 49.0.0", + "datafusion-physical-expr 49.0.0", + "datafusion-physical-expr-common 49.0.0", "half", "log", "paste", @@ -1710,9 +1824,21 @@ checksum = "d7fdc54656659e5ecd49bf341061f4156ab230052611f4f3609612a0da259696" dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", + "datafusion-common 48.0.0", + "datafusion-expr-common 48.0.0", + "datafusion-physical-expr-common 48.0.0", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 49.0.0", + "datafusion-expr-common 49.0.0", + "datafusion-physical-expr-common 49.0.0", ] [[package]] @@ -1723,14 +1849,35 @@ checksum = "fad94598e3374938ca43bca6b675febe557e7a14eb627d617db427d70d65118b" dependencies = [ "arrow", "arrow-ord", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-macros", - "datafusion-physical-expr-common", + "datafusion-common 48.0.0", + "datafusion-doc 48.0.0", + "datafusion-execution 48.0.0", + "datafusion-expr 48.0.0", + "datafusion-functions 48.0.0", + "datafusion-functions-aggregate 48.0.0", + "datafusion-macros 48.0.0", + "datafusion-physical-expr-common 48.0.0", + "itertools 0.14.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-nested" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "arrow", + "arrow-ord", + "datafusion-common 49.0.0", + "datafusion-doc 49.0.0", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-functions 49.0.0", + "datafusion-functions-aggregate 49.0.0", + "datafusion-functions-aggregate-common 49.0.0", + "datafusion-macros 49.0.0", + "datafusion-physical-expr-common 49.0.0", "itertools 0.14.0", "log", "paste", @@ -1738,15 +1885,14 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de2fc6c2946da5cab8364fb28b5cac3115f0f3a87960b235ed031c3f7e2e639b" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "async-trait", "datafusion-catalog", - "datafusion-common", - "datafusion-expr", + "datafusion-common 49.0.0", + "datafusion-expr 49.0.0", "datafusion-physical-plan", "parking_lot", "paste", @@ -1754,18 +1900,17 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e5746548a8544870a119f556543adcd88fe0ba6b93723fe78ad0439e0fbb8b4" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 49.0.0", + "datafusion-doc 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-functions-window-common 49.0.0", + "datafusion-macros 49.0.0", + "datafusion-physical-expr 49.0.0", + "datafusion-physical-expr-common 49.0.0", "log", "paste", ] @@ -1776,8 +1921,17 @@ version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcbe9404382cda257c434f22e13577bee7047031dfdb6216dd5e841b9465e6fe" dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", + "datafusion-common 48.0.0", + "datafusion-physical-expr-common 48.0.0", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "datafusion-common 49.0.0", + "datafusion-physical-expr-common 49.0.0", ] [[package]] @@ -1786,22 +1940,32 @@ version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8dce50e3b637dab0d25d04d2fe79dfdca2b257eabd76790bffd22c7f90d700c8" dependencies = [ - "datafusion-expr", + "datafusion-expr 48.0.0", + "quote", + "syn 2.0.104", +] + +[[package]] +name = "datafusion-macros" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "datafusion-expr 49.0.0", "quote", "syn 2.0.104", ] [[package]] name = "datafusion-optimizer" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03cfaacf06445dc3bbc1e901242d2a44f2cae99a744f49f3fefddcee46240058" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-common 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-expr-common 49.0.0", + "datafusion-physical-expr 49.0.0", "indexmap", "itertools 0.14.0", "log", @@ -1817,11 +1981,32 @@ checksum = "1908034a89d7b2630898e06863583ae4c00a0dd310c1589ca284195ee3f7f8a6" dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", + "datafusion-common 48.0.0", + "datafusion-expr 48.0.0", + "datafusion-expr-common 48.0.0", + "datafusion-functions-aggregate-common 48.0.0", + "datafusion-physical-expr-common 48.0.0", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.14.0", + "log", + "paste", + "petgraph 0.8.2", +] + +[[package]] +name = "datafusion-physical-expr" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-expr-common 49.0.0", + "datafusion-functions-aggregate-common 49.0.0", + "datafusion-physical-expr-common 49.0.0", "half", "hashbrown 0.14.5", "indexmap", @@ -1839,35 +2024,47 @@ checksum = "47b7a12dd59ea07614b67dbb01d85254fbd93df45bcffa63495e11d3bdf847df" dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-expr-common", + "datafusion-common 48.0.0", + "datafusion-expr-common 48.0.0", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 49.0.0", + "datafusion-expr-common 49.0.0", "hashbrown 0.14.5", "itertools 0.14.0", ] [[package]] name = "datafusion-physical-optimizer" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4371cc4ad33978cc2a8be93bd54a232d3f2857b50401a14631c0705f3f910aae" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 49.0.0", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-expr-common 49.0.0", + "datafusion-physical-expr 49.0.0", + "datafusion-physical-expr-common 49.0.0", "datafusion-physical-plan", + "datafusion-pruning", "itertools 0.14.0", "log", ] [[package]] name = "datafusion-physical-plan" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc47bc33025757a5c11f2cd094c5b6b5ed87f46fa33c023e6fdfa25fcbfade23" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "ahash", "arrow", @@ -1875,13 +2072,13 @@ dependencies = [ "arrow-schema", "async-trait", "chrono", - "datafusion-common", + "datafusion-common 49.0.0", "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-functions-window-common 49.0.0", + "datafusion-physical-expr 49.0.0", + "datafusion-physical-expr-common 49.0.0", "futures", "half", "hashbrown 0.14.5", @@ -1893,20 +2090,36 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-pruning" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common 49.0.0", + "datafusion-datasource", + "datafusion-expr-common 49.0.0", + "datafusion-physical-expr 49.0.0", + "datafusion-physical-expr-common 49.0.0", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", +] + [[package]] name = "datafusion-session" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7485da32283985d6b45bd7d13a65169dcbe8c869e25d01b2cfbc425254b4b49" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "async-trait", "dashmap", - "datafusion-common", + "datafusion-common 49.0.0", "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-physical-expr 49.0.0", "datafusion-physical-plan", "datafusion-sql", "futures", @@ -1919,30 +2132,28 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00825d8e84c5a96e21ea5e525a6a4ec8eea7d623942f13031b519ca596802e90" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "datafusion-catalog", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-macros", + "datafusion-common 49.0.0", + "datafusion-execution 49.0.0", + "datafusion-expr 49.0.0", + "datafusion-functions 49.0.0", + "datafusion-macros 49.0.0", "log", ] [[package]] name = "datafusion-sql" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a466b15632befddfeac68c125f0260f569ff315c6831538cbb40db754134e0df" +version = "49.0.0" +source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", "bigdecimal", - "datafusion-common", - "datafusion-expr", + "datafusion-common 49.0.0", + "datafusion-expr 49.0.0", "indexmap", "log", "regex", @@ -2673,6 +2884,17 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" +[[package]] +name = "io-uring" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" +dependencies = [ + "bitflags 2.9.1", + "cfg-if", + "libc", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -3187,9 +3409,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.12.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781f96d79ed0f961a7021424ab01840efbda64ae7a505aaea195efc91eaaec4" +checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496" dependencies = [ "async-trait", "base64", @@ -3206,7 +3428,7 @@ dependencies = [ "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.37.5", + "quick-xml 0.38.0", "rand 0.9.1", "reqwest", "ring", @@ -3306,6 +3528,7 @@ dependencies = [ "num-bigint", "object_store", "paste", + "ring", "seq-macro", "simdutf8", "snap", @@ -3610,9 +3833,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.37.5" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +checksum = "8927b0664f5c5a98265138b7e3f90aa19a6b21353182469ace36d4ac527b7b1b" dependencies = [ "memchr", "serde", @@ -4522,17 +4745,19 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.45.1" +version = "1.46.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779" +checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17" dependencies = [ "backtrace", "bytes", + "io-uring", "libc", "mio", "parking_lot", "pin-project-lite", "signal-hook-registry", + "slab", "socket2", "tokio-macros", "windows-sys 0.52.0", diff --git a/native/Cargo.toml b/native/Cargo.toml index 54e459646c..c9654bd12d 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -34,12 +34,12 @@ edition = "2021" rust-version = "1.85" [workspace.dependencies] -arrow = { version = "55.1.0", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow = { version = "55.2.0", features = ["prettyprint", "ffi", "chrono-tz"] } async-trait = { version = "0.1" } bytes = { version = "1.10.0" } -parquet = { version = "55.1.0", default-features = false, features = ["experimental"] } -datafusion = { version = "48.0.0", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } -datafusion-spark = { version = "48.0.0" } +parquet = { version = "55.2.0", default-features = false, features = ["experimental"] } +datafusion = { git = "https://github.com/apache/datafusion.git", branch = "branch-49", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] } +datafusion-spark = { git = "https://github.com/apache/datafusion.git", branch = "branch-49" } datafusion-comet-spark-expr = { path = "spark-expr" } datafusion-comet-proto = { path = "proto" } chrono = { version = "0.4", default-features = false, features = ["clock"] } @@ -49,7 +49,7 @@ num = "0.4" rand = "0.9" regex = "1.9.6" thiserror = "2" -object_store = { version = "0.12.0", features = ["gcp", "azure", "aws", "http"] } +object_store = { version = "0.12.3", features = ["gcp", "azure", "aws", "http"] } url = "2.2" aws-config = "1.6.3" aws-credential-types = "1.2.3" diff --git a/native/core/src/execution/operators/filter.rs b/native/core/src/execution/operators/filter.rs index 573287a088..ddc21c0411 100644 --- a/native/core/src/execution/operators/filter.rs +++ b/native/core/src/execution/operators/filter.rs @@ -211,22 +211,18 @@ impl FilterExec { if let Some(binary) = conjunction.as_any().downcast_ref::() { if binary.op() == &Operator::Eq { // Filter evaluates to single value for all partitions - if input_eqs.is_expr_constant(binary.left()) { + if input_eqs.is_expr_constant(binary.left()).is_some() { let (expr, across_parts) = ( binary.right(), input_eqs.get_expr_constant_value(binary.right()), ); - res_constants.push( - ConstExpr::new(Arc::clone(expr)).with_across_partitions(across_parts), - ); - } else if input_eqs.is_expr_constant(binary.right()) { + res_constants.push(ConstExpr::new(Arc::clone(expr), across_parts)); + } else if input_eqs.is_expr_constant(binary.right()).is_some() { let (expr, across_parts) = ( binary.left(), input_eqs.get_expr_constant_value(binary.left()), ); - res_constants.push( - ConstExpr::new(Arc::clone(expr)).with_across_partitions(across_parts), - ); + res_constants.push(ConstExpr::new(Arc::clone(expr), across_parts)); } } } @@ -246,7 +242,7 @@ impl FilterExec { let mut eq_properties = input.equivalence_properties().clone(); let (equal_pairs, _) = collect_columns_from_predicate(predicate); for (lhs, rhs) in equal_pairs { - eq_properties.add_equal_conditions(lhs, rhs)? + eq_properties.add_equal_conditions(Arc::clone(lhs), Arc::clone(rhs))? } // Add the columns that have only one viable value (singleton) after // filtering to constants. @@ -258,14 +254,13 @@ impl FilterExec { .min_value .get_value(); let expr = Arc::new(column) as _; - ConstExpr::new(expr) - .with_across_partitions(AcrossPartitions::Uniform(value.cloned())) + ConstExpr::new(expr, AcrossPartitions::Uniform(value.cloned())) }); // This is for statistics - eq_properties = eq_properties.with_constants(constants); + eq_properties.add_constants(constants)?; // This is for logical constant (for example: a = '1', then a could be marked as a constant) - // to do: how to deal with multiple situation to represent = (for example c1 between 0 and 0) - eq_properties = eq_properties.with_constants(Self::extend_constants(input, predicate)); + // to do: how to deal with a multiple situation to represent = (for example, c1 between 0 and 0) + eq_properties.add_constants(Self::extend_constants(input, predicate))?; let mut output_partitioning = input.output_partitioning().clone(); // If contains projection, update the PlanProperties. diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index 1b1c1ae570..3d82a9db99 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -72,7 +72,7 @@ use crate::parquet::parquet_support::prepare_object_store_with_configs; use datafusion::common::scalar::ScalarStructBuilder; use datafusion::common::{ tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter}, - JoinType as DFJoinType, ScalarValue, + JoinType as DFJoinType, NullEquality, ScalarValue, }; use datafusion::datasource::listing::PartitionedFile; use datafusion::logical_expr::type_coercion::other::get_coerce_type_for_case_expression; @@ -1146,7 +1146,7 @@ impl PhysicalPlanner { let child_copied = Self::wrap_in_copy_exec(Arc::clone(&child.native_plan)); let sort = Arc::new( - SortExec::new(LexOrdering::new(exprs?), Arc::clone(&child_copied)) + SortExec::new(LexOrdering::new(exprs?).unwrap(), Arc::clone(&child_copied)) .with_fetch(fetch), ); @@ -1422,7 +1422,7 @@ impl PhysicalPlanner { sort_options, // null doesn't equal to null in Spark join key. If the join key is // `EqualNullSafe`, Spark will rewrite it during planning. - false, + NullEquality::NullEqualsNothing, )?); if join.filter.is_some() { @@ -1490,7 +1490,7 @@ impl PhysicalPlanner { PartitionMode::Partitioned, // null doesn't equal to null in Spark join key. If the join key is // `EqualNullSafe`, Spark will rewrite it during planning. - false, + NullEquality::NullEqualsNothing, )?); // If the hash join is build right, we need to swap the left and right @@ -2192,7 +2192,7 @@ impl PhysicalPlanner { window_func_name, &window_args, partition_by, - &LexOrdering::new(sort_exprs.to_vec()), + &LexOrdering::new(sort_exprs.to_vec()).unwrap(), window_frame.into(), input_schema.as_ref(), false, // TODO: Ignore nulls @@ -2273,7 +2273,7 @@ impl PhysicalPlanner { .iter() .map(|expr| self.create_sort_expr(expr, Arc::clone(&input_schema))) .collect(); - let lex_ordering = LexOrdering::from(exprs?); + let lex_ordering = LexOrdering::new(exprs?).unwrap(); Ok(CometPartitioning::RangePartitioning( lex_ordering, range_partition.num_partitions as usize, diff --git a/native/core/src/execution/shuffle/shuffle_writer.rs b/native/core/src/execution/shuffle/shuffle_writer.rs index d23d02a2c8..6dd528c8f3 100644 --- a/native/core/src/execution/shuffle/shuffle_writer.rs +++ b/native/core/src/execution/shuffle/shuffle_writer.rs @@ -934,7 +934,7 @@ impl SinglePartitionShufflePartitioner { Ok(Some(concatenated)) } Err(e) => Err(DataFusionError::ArrowError( - e, + Box::from(e), Some(DataFusionError::get_back_trace()), )), } @@ -1122,7 +1122,7 @@ impl Iterator for PartitionedBatchIterator<'_> { Some(Ok(batch)) } Err(e) => Some(Err(DataFusionError::ArrowError( - e, + Box::from(e), Some(DataFusionError::get_back_trace()), ))), } From 3604364e7c2b21c179b893c35dfaca895f4fd64a Mon Sep 17 00:00:00 2001 From: comphead Date: Wed, 23 Jul 2025 08:15:26 -0700 Subject: [PATCH 2/8] chore: migrate to DF 49.0.0 --- native/core/src/execution/operators/filter.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/native/core/src/execution/operators/filter.rs b/native/core/src/execution/operators/filter.rs index ddc21c0411..4e28145cce 100644 --- a/native/core/src/execution/operators/filter.rs +++ b/native/core/src/execution/operators/filter.rs @@ -212,17 +212,15 @@ impl FilterExec { if binary.op() == &Operator::Eq { // Filter evaluates to single value for all partitions if input_eqs.is_expr_constant(binary.left()).is_some() { - let (expr, across_parts) = ( - binary.right(), - input_eqs.get_expr_constant_value(binary.right()), - ); - res_constants.push(ConstExpr::new(Arc::clone(expr), across_parts)); + let across = input_eqs + .is_expr_constant(binary.right()) + .unwrap_or_default(); + res_constants.push(ConstExpr::new(Arc::clone(binary.right()), across)); } else if input_eqs.is_expr_constant(binary.right()).is_some() { - let (expr, across_parts) = ( - binary.left(), - input_eqs.get_expr_constant_value(binary.left()), - ); - res_constants.push(ConstExpr::new(Arc::clone(expr), across_parts)); + let across = input_eqs + .is_expr_constant(binary.left()) + .unwrap_or_default(); + res_constants.push(ConstExpr::new(Arc::clone(binary.left()), across)); } } } From 2f6e27e54fabf359bda53c776b7df020cc9294fa Mon Sep 17 00:00:00 2001 From: comphead Date: Wed, 23 Jul 2025 08:33:58 -0700 Subject: [PATCH 3/8] chore: migrate to DF 49.0.0 --- native/Cargo.lock | 509 +++++------------- native/core/Cargo.toml | 2 +- native/core/benches/shuffle_writer.rs | 3 +- .../execution/shuffle/comet_partitioning.rs | 2 +- .../execution/shuffle/range_partitioner.rs | 2 +- .../src/execution/shuffle/shuffle_writer.rs | 3 +- native/hdfs/src/object_store/hdfs.rs | 4 +- 7 files changed, 134 insertions(+), 391 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index 2719fe1654..72ed8cd625 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -1275,23 +1275,23 @@ dependencies = [ "chrono", "datafusion-catalog", "datafusion-catalog-listing", - "datafusion-common 49.0.0", + "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-expr-common 49.0.0", - "datafusion-functions 49.0.0", - "datafusion-functions-aggregate 49.0.0", - "datafusion-functions-nested 49.0.0", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", - "datafusion-physical-expr 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-session", @@ -1319,12 +1319,12 @@ dependencies = [ "arrow", "async-trait", "dashmap", - "datafusion-common 49.0.0", + "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-physical-expr 49.0.0", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", "datafusion-sql", @@ -1344,12 +1344,12 @@ dependencies = [ "arrow", "async-trait", "datafusion-catalog", - "datafusion-common 49.0.0", + "datafusion-common", "datafusion-datasource", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-physical-expr 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1374,7 +1374,7 @@ dependencies = [ "datafusion-comet-objectstore-hdfs", "datafusion-comet-proto", "datafusion-comet-spark-expr", - "datafusion-functions-nested 48.0.0", + "datafusion-functions-nested", "datafusion-spark", "futures", "hex", @@ -1447,27 +1447,6 @@ dependencies = [ "twox-hash", ] -[[package]] -name = "datafusion-common" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3d56b2ac9f476b93ca82e4ef5fb00769c8a3f248d12b4965af7e27635fa7e12" -dependencies = [ - "ahash", - "arrow", - "arrow-ipc", - "base64", - "half", - "hashbrown 0.14.5", - "indexmap", - "libc", - "log", - "paste", - "sqlparser", - "tokio", - "web-time", -] - [[package]] name = "datafusion-common" version = "49.0.0" @@ -1510,12 +1489,12 @@ dependencies = [ "async-trait", "bytes", "chrono", - "datafusion-common 49.0.0", + "datafusion-common", "datafusion-common-runtime", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-physical-expr 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1539,13 +1518,13 @@ dependencies = [ "async-trait", "bytes", "datafusion-catalog", - "datafusion-common 49.0.0", + "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-physical-expr 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1563,13 +1542,13 @@ dependencies = [ "async-trait", "bytes", "datafusion-catalog", - "datafusion-common 49.0.0", + "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-physical-expr 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", @@ -1587,14 +1566,14 @@ dependencies = [ "async-trait", "bytes", "datafusion-catalog", - "datafusion-common 49.0.0", + "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-functions-aggregate 49.0.0", - "datafusion-physical-expr 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", @@ -1609,36 +1588,11 @@ dependencies = [ "tokio", ] -[[package]] -name = "datafusion-doc" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bcd8a3e3e3d02ea642541be23d44376b5d5c37c2938cce39b3873cdf7186eea" - [[package]] name = "datafusion-doc" version = "49.0.0" source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" -[[package]] -name = "datafusion-execution" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670da1d45d045eee4c2319b8c7ea57b26cf48ab77b630aaa50b779e406da476a" -dependencies = [ - "arrow", - "dashmap", - "datafusion-common 48.0.0", - "datafusion-expr 48.0.0", - "futures", - "log", - "object_store", - "parking_lot", - "rand 0.9.1", - "tempfile", - "url", -] - [[package]] name = "datafusion-execution" version = "49.0.0" @@ -1646,8 +1600,8 @@ source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999 dependencies = [ "arrow", "dashmap", - "datafusion-common 49.0.0", - "datafusion-expr 49.0.0", + "datafusion-common", + "datafusion-expr", "futures", "log", "object_store", @@ -1657,26 +1611,6 @@ dependencies = [ "url", ] -[[package]] -name = "datafusion-expr" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3a577f64bdb7e2cc4043cd97f8901d8c504711fde2dbcb0887645b00d7c660b" -dependencies = [ - "arrow", - "chrono", - "datafusion-common 48.0.0", - "datafusion-doc 48.0.0", - "datafusion-expr-common 48.0.0", - "datafusion-functions-aggregate-common 48.0.0", - "datafusion-functions-window-common 48.0.0", - "datafusion-physical-expr-common 48.0.0", - "indexmap", - "paste", - "serde_json", - "sqlparser", -] - [[package]] name = "datafusion-expr" version = "49.0.0" @@ -1685,68 +1619,30 @@ dependencies = [ "arrow", "async-trait", "chrono", - "datafusion-common 49.0.0", - "datafusion-doc 49.0.0", - "datafusion-expr-common 49.0.0", - "datafusion-functions-aggregate-common 49.0.0", - "datafusion-functions-window-common 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", "indexmap", "paste", "serde_json", "sqlparser", ] -[[package]] -name = "datafusion-expr-common" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b7916806ace3e9f41884f230f7f38ebf0e955dfbd88266da1826f29a0b9a6a" -dependencies = [ - "arrow", - "datafusion-common 48.0.0", - "indexmap", - "itertools 0.14.0", - "paste", -] - [[package]] name = "datafusion-expr-common" version = "49.0.0" source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", - "datafusion-common 49.0.0", + "datafusion-common", "indexmap", "itertools 0.14.0", "paste", ] -[[package]] -name = "datafusion-functions" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fb31c9dc73d3e0c365063f91139dc273308f8a8e124adda9898db8085d68357" -dependencies = [ - "arrow", - "arrow-buffer", - "base64", - "chrono", - "datafusion-common 48.0.0", - "datafusion-doc 48.0.0", - "datafusion-execution 48.0.0", - "datafusion-expr 48.0.0", - "datafusion-expr-common 48.0.0", - "datafusion-macros 48.0.0", - "hex", - "itertools 0.14.0", - "log", - "rand 0.9.1", - "regex", - "unicode-segmentation", - "uuid", -] - [[package]] name = "datafusion-functions" version = "49.0.0" @@ -1758,12 +1654,12 @@ dependencies = [ "blake2", "blake3", "chrono", - "datafusion-common 49.0.0", - "datafusion-doc 49.0.0", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-expr-common 49.0.0", - "datafusion-macros 49.0.0", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", "hex", "itertools 0.14.0", "log", @@ -1775,27 +1671,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "datafusion-functions-aggregate" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebb72c6940697eaaba9bd1f746a697a07819de952b817e3fb841fb75331ad5d4" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 48.0.0", - "datafusion-doc 48.0.0", - "datafusion-execution 48.0.0", - "datafusion-expr 48.0.0", - "datafusion-functions-aggregate-common 48.0.0", - "datafusion-macros 48.0.0", - "datafusion-physical-expr 48.0.0", - "datafusion-physical-expr-common 48.0.0", - "half", - "log", - "paste", -] - [[package]] name = "datafusion-functions-aggregate" version = "49.0.0" @@ -1803,32 +1678,19 @@ source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999 dependencies = [ "ahash", "arrow", - "datafusion-common 49.0.0", - "datafusion-doc 49.0.0", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-functions-aggregate-common 49.0.0", - "datafusion-macros 49.0.0", - "datafusion-physical-expr 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "half", "log", "paste", ] -[[package]] -name = "datafusion-functions-aggregate-common" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7fdc54656659e5ecd49bf341061f4156ab230052611f4f3609612a0da259696" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 48.0.0", - "datafusion-expr-common 48.0.0", - "datafusion-physical-expr-common 48.0.0", -] - [[package]] name = "datafusion-functions-aggregate-common" version = "49.0.0" @@ -1836,30 +1698,9 @@ source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999 dependencies = [ "ahash", "arrow", - "datafusion-common 49.0.0", - "datafusion-expr-common 49.0.0", - "datafusion-physical-expr-common 49.0.0", -] - -[[package]] -name = "datafusion-functions-nested" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad94598e3374938ca43bca6b675febe557e7a14eb627d617db427d70d65118b" -dependencies = [ - "arrow", - "arrow-ord", - "datafusion-common 48.0.0", - "datafusion-doc 48.0.0", - "datafusion-execution 48.0.0", - "datafusion-expr 48.0.0", - "datafusion-functions 48.0.0", - "datafusion-functions-aggregate 48.0.0", - "datafusion-macros 48.0.0", - "datafusion-physical-expr-common 48.0.0", - "itertools 0.14.0", - "log", - "paste", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", ] [[package]] @@ -1869,15 +1710,15 @@ source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999 dependencies = [ "arrow", "arrow-ord", - "datafusion-common 49.0.0", - "datafusion-doc 49.0.0", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-functions 49.0.0", - "datafusion-functions-aggregate 49.0.0", - "datafusion-functions-aggregate-common 49.0.0", - "datafusion-macros 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr-common", "itertools 0.14.0", "log", "paste", @@ -1891,8 +1732,8 @@ dependencies = [ "arrow", "async-trait", "datafusion-catalog", - "datafusion-common 49.0.0", - "datafusion-expr 49.0.0", + "datafusion-common", + "datafusion-expr", "datafusion-physical-plan", "parking_lot", "paste", @@ -1904,45 +1745,24 @@ version = "49.0.0" source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", - "datafusion-common 49.0.0", - "datafusion-doc 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-functions-window-common 49.0.0", - "datafusion-macros 49.0.0", - "datafusion-physical-expr 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "log", "paste", ] -[[package]] -name = "datafusion-functions-window-common" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcbe9404382cda257c434f22e13577bee7047031dfdb6216dd5e841b9465e6fe" -dependencies = [ - "datafusion-common 48.0.0", - "datafusion-physical-expr-common 48.0.0", -] - [[package]] name = "datafusion-functions-window-common" version = "49.0.0" source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ - "datafusion-common 49.0.0", - "datafusion-physical-expr-common 49.0.0", -] - -[[package]] -name = "datafusion-macros" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dce50e3b637dab0d25d04d2fe79dfdca2b257eabd76790bffd22c7f90d700c8" -dependencies = [ - "datafusion-expr 48.0.0", - "quote", - "syn 2.0.104", + "datafusion-common", + "datafusion-physical-expr-common", ] [[package]] @@ -1950,7 +1770,7 @@ name = "datafusion-macros" version = "49.0.0" source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ - "datafusion-expr 49.0.0", + "datafusion-expr", "quote", "syn 2.0.104", ] @@ -1962,10 +1782,10 @@ source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999 dependencies = [ "arrow", "chrono", - "datafusion-common 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-expr-common 49.0.0", - "datafusion-physical-expr 49.0.0", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", "indexmap", "itertools 0.14.0", "log", @@ -1973,28 +1793,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "datafusion-physical-expr" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1908034a89d7b2630898e06863583ae4c00a0dd310c1589ca284195ee3f7f8a6" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 48.0.0", - "datafusion-expr 48.0.0", - "datafusion-expr-common 48.0.0", - "datafusion-functions-aggregate-common 48.0.0", - "datafusion-physical-expr-common 48.0.0", - "half", - "hashbrown 0.14.5", - "indexmap", - "itertools 0.14.0", - "log", - "paste", - "petgraph 0.8.2", -] - [[package]] name = "datafusion-physical-expr" version = "49.0.0" @@ -2002,11 +1800,11 @@ source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999 dependencies = [ "ahash", "arrow", - "datafusion-common 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-expr-common 49.0.0", - "datafusion-functions-aggregate-common 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", "indexmap", @@ -2016,20 +1814,6 @@ dependencies = [ "petgraph 0.8.2", ] -[[package]] -name = "datafusion-physical-expr-common" -version = "48.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b7a12dd59ea07614b67dbb01d85254fbd93df45bcffa63495e11d3bdf847df" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 48.0.0", - "datafusion-expr-common 48.0.0", - "hashbrown 0.14.5", - "itertools 0.14.0", -] - [[package]] name = "datafusion-physical-expr-common" version = "49.0.0" @@ -2037,8 +1821,8 @@ source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999 dependencies = [ "ahash", "arrow", - "datafusion-common 49.0.0", - "datafusion-expr-common 49.0.0", + "datafusion-common", + "datafusion-expr-common", "hashbrown 0.14.5", "itertools 0.14.0", ] @@ -2049,12 +1833,12 @@ version = "49.0.0" source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999d0a1ab500f42a32251370f214f837d1e" dependencies = [ "arrow", - "datafusion-common 49.0.0", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-expr-common 49.0.0", - "datafusion-physical-expr 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", @@ -2072,13 +1856,13 @@ dependencies = [ "arrow-schema", "async-trait", "chrono", - "datafusion-common 49.0.0", + "datafusion-common", "datafusion-common-runtime", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-functions-window-common 49.0.0", - "datafusion-physical-expr 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "futures", "half", "hashbrown 0.14.5", @@ -2097,11 +1881,11 @@ source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999 dependencies = [ "arrow", "arrow-schema", - "datafusion-common 49.0.0", + "datafusion-common", "datafusion-datasource", - "datafusion-expr-common 49.0.0", - "datafusion-physical-expr 49.0.0", - "datafusion-physical-expr-common 49.0.0", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "itertools 0.14.0", "log", @@ -2115,11 +1899,11 @@ dependencies = [ "arrow", "async-trait", "dashmap", - "datafusion-common 49.0.0", + "datafusion-common", "datafusion-common-runtime", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-physical-expr 49.0.0", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-sql", "futures", @@ -2137,11 +1921,11 @@ source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999 dependencies = [ "arrow", "datafusion-catalog", - "datafusion-common 49.0.0", - "datafusion-execution 49.0.0", - "datafusion-expr 49.0.0", - "datafusion-functions 49.0.0", - "datafusion-macros 49.0.0", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-macros", "log", ] @@ -2152,8 +1936,8 @@ source = "git+https://github.com/apache/datafusion.git?branch=branch-49#afb90999 dependencies = [ "arrow", "bigdecimal", - "datafusion-common 49.0.0", - "datafusion-expr 49.0.0", + "datafusion-common", + "datafusion-expr", "indexmap", "log", "regex", @@ -3813,15 +3597,6 @@ dependencies = [ "prost", ] -[[package]] -name = "psm" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" -dependencies = [ - "cc", -] - [[package]] name = "quick-xml" version = "0.26.0" @@ -3990,26 +3765,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "recursive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" -dependencies = [ - "recursive-proc-macro-impl", - "stacker", -] - -[[package]] -name = "recursive-proc-macro-impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" -dependencies = [ - "quote", - "syn 2.0.104", -] - [[package]] name = "redox_syscall" version = "0.5.13" @@ -4447,7 +4202,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" dependencies = [ "log", - "recursive", "sqlparser_derive", ] @@ -4468,19 +4222,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" -[[package]] -name = "stacker" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.59.0", -] - [[package]] name = "static_assertions" version = "1.1.0" diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index 9b6a36b8f7..d992c96912 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -84,7 +84,7 @@ jni = { version = "0.21", features = ["invocation"] } lazy_static = "1.4" assertables = "9" hex = "0.4.3" -datafusion-functions-nested = { version = "48.0.0" } +datafusion-functions-nested = { git = "https://github.com/apache/datafusion.git", branch = "branch-49" } [features] default = [] diff --git a/native/core/benches/shuffle_writer.rs b/native/core/benches/shuffle_writer.rs index ee07462d00..52638d92a9 100644 --- a/native/core/benches/shuffle_writer.rs +++ b/native/core/benches/shuffle_writer.rs @@ -89,7 +89,8 @@ fn criterion_benchmark(c: &mut Criterion) { CometPartitioning::RangePartitioning( LexOrdering::new(vec![PhysicalSortExpr::new_default( col("c0", batch.schema().as_ref()).unwrap(), - )]), + )]) + .unwrap(), 16, 100, ), diff --git a/native/core/src/execution/shuffle/comet_partitioning.rs b/native/core/src/execution/shuffle/comet_partitioning.rs index b063473bdf..9c33da8e93 100644 --- a/native/core/src/execution/shuffle/comet_partitioning.rs +++ b/native/core/src/execution/shuffle/comet_partitioning.rs @@ -24,7 +24,7 @@ pub enum CometPartitioning { /// Allocate rows based on a hash of one of more expressions and the specified number of /// partitions Hash(Vec>, usize), - /// Allocate rows based on lexical order of one of more expressions and the specified number of + /// Allocate rows based on the lexical order of one of more expressions and the specified number of /// partitions RangePartitioning(LexOrdering, usize, usize), } diff --git a/native/core/src/execution/shuffle/range_partitioner.rs b/native/core/src/execution/shuffle/range_partitioner.rs index 8f443232f9..e14cf61482 100644 --- a/native/core/src/execution/shuffle/range_partitioner.rs +++ b/native/core/src/execution/shuffle/range_partitioner.rs @@ -247,7 +247,7 @@ mod test { let (rows, row_converter) = RangePartitioner::generate_bounds( input_batch.columns().to_vec().as_ref(), - &lex_ordering, + &lex_ordering.unwrap(), 10, input_batch.num_rows(), 1000, diff --git a/native/core/src/execution/shuffle/shuffle_writer.rs b/native/core/src/execution/shuffle/shuffle_writer.rs index 6dd528c8f3..669a889ed8 100644 --- a/native/core/src/execution/shuffle/shuffle_writer.rs +++ b/native/core/src/execution/shuffle/shuffle_writer.rs @@ -1409,7 +1409,8 @@ mod test { CometPartitioning::RangePartitioning( LexOrdering::new(vec![PhysicalSortExpr::new_default( col("a", batch.schema().as_ref()).unwrap(), - )]), + )]) + .unwrap(), num_partitions, 100, ), diff --git a/native/hdfs/src/object_store/hdfs.rs b/native/hdfs/src/object_store/hdfs.rs index bb03001082..b49e879429 100644 --- a/native/hdfs/src/object_store/hdfs.rs +++ b/native/hdfs/src/object_store/hdfs.rs @@ -32,7 +32,7 @@ use hdfs::walkdir::HdfsWalkDir; use object_store::{ path::{self, Path}, Error, GetOptions, GetRange, GetResult, GetResultPayload, ListResult, MultipartUpload, - ObjectMeta, ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, + ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, }; /// scheme for HDFS File System @@ -139,7 +139,7 @@ impl ObjectStore for HadoopFileSystem { async fn put_multipart_opts( &self, _location: &Path, - _opts: PutMultipartOpts, + _opts: PutMultipartOptions, ) -> object_store::Result> { unimplemented!() } From 38c770567e9465867f7930935fc543b4258b9c5c Mon Sep 17 00:00:00 2001 From: comphead Date: Wed, 23 Jul 2025 11:35:55 -0700 Subject: [PATCH 4/8] chore: migrate to DF 49.0.0 --- native/core/src/execution/planner.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index 3d82a9db99..f731fb41f9 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -2186,13 +2186,15 @@ impl PhysicalPlanner { }; let window_frame = WindowFrame::new_bounds(units, lower_bound, upper_bound); + let lex_orderings = LexOrdering::new(sort_exprs.to_vec()); + let sort_phy_exprs = lex_orderings.as_deref().unwrap_or(&[]); datafusion::physical_plan::windows::create_window_expr( &window_func, window_func_name, &window_args, partition_by, - &LexOrdering::new(sort_exprs.to_vec()).unwrap(), + sort_phy_exprs, window_frame.into(), input_schema.as_ref(), false, // TODO: Ignore nulls From d614dcaa77f3221b5e30e2d59a1bc979ab750280 Mon Sep 17 00:00:00 2001 From: comphead Date: Wed, 23 Jul 2025 13:35:39 -0700 Subject: [PATCH 5/8] fixes --- native/core/src/execution/planner.rs | 8 ++++++++ native/spark-expr/src/conversion_funcs/cast.rs | 1 + 2 files changed, 9 insertions(+) diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index f731fb41f9..85c97cd5f8 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -593,6 +593,14 @@ impl PhysicalPlanner { true, false, ))), + // DataFusion 49 hardcodes return type for MD5 built in function as UTF8View + // which is not yet supported in Comet + // Converting forcibly to UTF8. To be removed after UTF8View supported + "md5" => Ok(Arc::new(Cast::new( + func?, + DataType::Utf8, + SparkCastOptions::new_without_timezone(EvalMode::Try, true), + ))), _ => func, } } diff --git a/native/spark-expr/src/conversion_funcs/cast.rs b/native/spark-expr/src/conversion_funcs/cast.rs index e0bc5f39fc..84a7313c63 100644 --- a/native/spark-expr/src/conversion_funcs/cast.rs +++ b/native/spark-expr/src/conversion_funcs/cast.rs @@ -960,6 +960,7 @@ fn cast_array( { spark_cast_nonintegral_numeric_to_integral(&array, eval_mode, from_type, to_type) } + (Utf8View, Utf8) => Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?), (Struct(_), Utf8) => Ok(casts_struct_to_string(array.as_struct(), cast_options)?), (Struct(_), Struct(_)) => Ok(cast_struct_to_struct( array.as_struct(), From c47e422419670f9e681aed920e012eba726e72e9 Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 25 Jul 2025 15:41:02 -0700 Subject: [PATCH 6/8] fixes --- .../apache/comet/exec/CometAggregateSuite.scala | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala index be7fe7ee52..993716a9c8 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala @@ -852,6 +852,23 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } + // Temp ignore until https://github.com/apache/datafusion/pull/16918 fixed + ignore("first/last with ignore null1") { + val data = Range(0, 3).flatMap(n => Seq((n, 1), (n, 2))).toDF("a", "b") + withTempDir { dir => + val filename = s"${dir.getAbsolutePath}/first_last_ignore_null.parquet" + data.repartition(2).write.parquet(filename) + withSQLConf(CometConf.COMET_BATCH_SIZE.key -> "1") { + spark.read.parquet(filename).createOrReplaceTempView("t1") + for (expr <- Seq("first", "last")) { + // deterministic query that should return one non-null value per group + val df = spark.sql(s"SELECT a, $expr(IF(b==1,null,b)) IGNORE NULLS FROM t1 GROUP BY a") + checkSparkAnswerAndOperator(df) + } + } + } + } + test("all types, with nulls") { val numValues = 2048 From 17a1470c5bcc02c802d5f8c8cc2777628bf197c6 Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 25 Jul 2025 16:59:29 -0700 Subject: [PATCH 7/8] fixes --- .../comet/exec/CometAggregateSuite.scala | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala index 993716a9c8..375d76b6dc 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala @@ -835,6 +835,7 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } + // Temp ignore until https://github.com/apache/datafusion/pull/16918 fixed test("first/last with ignore null") { val data = Range(0, 8192).flatMap(n => Seq((n, 1), (n, 2))).toDF("a", "b") withTempDir { dir => @@ -852,23 +853,6 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - // Temp ignore until https://github.com/apache/datafusion/pull/16918 fixed - ignore("first/last with ignore null1") { - val data = Range(0, 3).flatMap(n => Seq((n, 1), (n, 2))).toDF("a", "b") - withTempDir { dir => - val filename = s"${dir.getAbsolutePath}/first_last_ignore_null.parquet" - data.repartition(2).write.parquet(filename) - withSQLConf(CometConf.COMET_BATCH_SIZE.key -> "1") { - spark.read.parquet(filename).createOrReplaceTempView("t1") - for (expr <- Seq("first", "last")) { - // deterministic query that should return one non-null value per group - val df = spark.sql(s"SELECT a, $expr(IF(b==1,null,b)) IGNORE NULLS FROM t1 GROUP BY a") - checkSparkAnswerAndOperator(df) - } - } - } - } - test("all types, with nulls") { val numValues = 2048 From fa96ad69763e4c6c705f116b647381d837603b80 Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 25 Jul 2025 20:23:00 -0700 Subject: [PATCH 8/8] fixes --- .../test/scala/org/apache/comet/exec/CometAggregateSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala index 375d76b6dc..f0a75a14fd 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala @@ -836,7 +836,7 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper { } // Temp ignore until https://github.com/apache/datafusion/pull/16918 fixed - test("first/last with ignore null") { + ignore("first/last with ignore null") { val data = Range(0, 8192).flatMap(n => Seq((n, 1), (n, 2))).toDF("a", "b") withTempDir { dir => val filename = s"${dir.getAbsolutePath}/first_last_ignore_null.parquet"