diff --git a/.github_changelog_generator b/.github_changelog_generator deleted file mode 100644 index 45eef2f51836..000000000000 --- a/.github_changelog_generator +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -# some issues are just documentation -add-sections={"documentation":{"prefix":"**Documentation updates:**","labels":["documentation"]},"performance":{"prefix":"**Performance improvements:**","labels":["performance"]}} -# uncomment to not show PRs. TBD if we shown them or not. -#pull-requests=false -# so that the component is shown associated with the issue -issue-line-labels=sql -exclude-labels=development-process,invalid -breaking-labels=api change diff --git a/Cargo.toml b/Cargo.toml index 83faf4b6a8f2..65ef191d7421 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,7 +53,7 @@ license = "Apache-2.0" readme = "README.md" repository = "https://github.com/apache/datafusion" rust-version = "1.73" -version = "38.0.0" +version = "39.0.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -86,23 +86,23 @@ bytes = "1.4" chrono = { version = "0.4.34", default-features = false } ctor = "0.2.0" dashmap = "5.5.0" -datafusion = { path = "datafusion/core", version = "38.0.0", default-features = false } -datafusion-common = { path = "datafusion/common", version = "38.0.0", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "38.0.0" } -datafusion-execution = { path = "datafusion/execution", version = "38.0.0" } -datafusion-expr = { path = "datafusion/expr", version = "38.0.0" } -datafusion-functions = { path = "datafusion/functions", version = "38.0.0" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "38.0.0" } -datafusion-functions-array = { path = "datafusion/functions-array", version = "38.0.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "38.0.0", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "38.0.0", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "38.0.0", default-features = false } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "38.0.0" } -datafusion-proto = { path = "datafusion/proto", version = "38.0.0" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "38.0.0" } -datafusion-sql = { path = "datafusion/sql", version = "38.0.0" } -datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "38.0.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "38.0.0" } +datafusion = { path = "datafusion/core", version = "39.0.0", default-features = false } +datafusion-common = { path = "datafusion/common", version = "39.0.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "39.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "39.0.0" } +datafusion-expr = { path = "datafusion/expr", version = "39.0.0" } +datafusion-functions = { path = "datafusion/functions", version = "39.0.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "39.0.0" } +datafusion-functions-array = { path = "datafusion/functions-array", version = "39.0.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "39.0.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "39.0.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "39.0.0", default-features = false } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "39.0.0" } +datafusion-proto = { path = "datafusion/proto", version = "39.0.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "39.0.0" } +datafusion-sql = { path = "datafusion/sql", version = "39.0.0" } +datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "39.0.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "39.0.0" } doc-comment = "0.3" env_logger = "0.11" futures = "0.3" diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 04ced84d9950..110e5c3194c7 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -875,9 +875,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.98" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" dependencies = [ "jobserver", "libc", @@ -1123,7 +1123,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "38.0.0" +version = "39.0.0" dependencies = [ "ahash", "apache-avro", @@ -1177,7 +1177,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "38.0.0" +version = "39.0.0" dependencies = [ "arrow", "assert_cmd", @@ -1204,7 +1204,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "38.0.0" +version = "39.0.0" dependencies = [ "ahash", "apache-avro", @@ -1225,14 +1225,14 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "38.0.0" +version = "39.0.0" dependencies = [ "tokio", ] [[package]] name = "datafusion-execution" -version = "38.0.0" +version = "39.0.0" dependencies = [ "arrow", "chrono", @@ -1251,7 +1251,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "38.0.0" +version = "39.0.0" dependencies = [ "ahash", "arrow", @@ -1268,7 +1268,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "38.0.0" +version = "39.0.0" dependencies = [ "arrow", "base64 0.22.1", @@ -1293,7 +1293,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "38.0.0" +version = "39.0.0" dependencies = [ "ahash", "arrow", @@ -1309,7 +1309,7 @@ dependencies = [ [[package]] name = "datafusion-functions-array" -version = "38.0.0" +version = "39.0.0" dependencies = [ "arrow", "arrow-array", @@ -1327,7 +1327,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "38.0.0" +version = "39.0.0" dependencies = [ "arrow", "async-trait", @@ -1344,7 +1344,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "38.0.0" +version = "39.0.0" dependencies = [ "ahash", "arrow", @@ -1373,7 +1373,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "38.0.0" +version = "39.0.0" dependencies = [ "arrow", "datafusion-common", @@ -1383,7 +1383,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "38.0.0" +version = "39.0.0" dependencies = [ "ahash", "arrow", @@ -1415,7 +1415,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "38.0.0" +version = "39.0.0" dependencies = [ "arrow", "arrow-array", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 5578d7fe5839..5e393246b958 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-cli" description = "Command Line Client for DataFusion query engine." -version = "38.0.0" +version = "39.0.0" authors = ["Apache DataFusion "] edition = "2021" keywords = ["arrow", "datafusion", "query", "sql"] @@ -35,7 +35,7 @@ async-trait = "0.1.41" aws-config = "0.55" aws-credential-types = "0.55" clap = { version = "3", features = ["derive", "cargo"] } -datafusion = { path = "../datafusion/core", version = "38.0.0", features = [ +datafusion = { path = "../datafusion/core", version = "39.0.0", features = [ "avro", "crypto_expressions", "datetime_expressions", diff --git a/datafusion/CHANGELOG.md b/datafusion/CHANGELOG.md index 04de1efe5a81..71c6689c0cbd 100644 --- a/datafusion/CHANGELOG.md +++ b/datafusion/CHANGELOG.md @@ -19,41 +19,4 @@ # Changelog -- [38.0.0](../dev/changelog/38.0.0.md) -- [37.1.0](../dev/changelog/37.1.0.md) -- [37.0.0](../dev/changelog/37.0.0.md) -- [36.0.0](../dev/changelog/36.0.0.md) -- [35.0.0](../dev/changelog/35.0.0.md) -- [34.0.0](../dev/changelog/34.0.0.md) -- [33.0.0](../dev/changelog/33.0.0.md) -- [32.0.0](../dev/changelog/32.0.0.md) -- [31.0.0](../dev/changelog/31.0.0.md) -- [30.0.0](../dev/changelog/30.0.0.md) -- [29.0.0](../dev/changelog/29.0.0.md) -- [28.0.0](../dev/changelog/28.0.0.md) -- [27.0.0](../dev/changelog/27.0.0.md) -- [26.0.0](../dev/changelog/26.0.0.md) -- [25.0.0](../dev/changelog/25.0.0.md) -- [24.0.0](../dev/changelog/24.0.0.md) -- [23.0.0](../dev/changelog/23.0.0.md) -- [22.0.0](../dev/changelog/22.0.0.md) -- [21.1.0](../dev/changelog/21.1.0.md) -- [21.0.0](../dev/changelog/21.0.0.md) -- [20.0.0](../dev/changelog/20.0.0.md) -- [19.0.0](../dev/changelog/19.0.0.md) -- [18.0.0](../dev/changelog/18.0.0.md) -- [17.0.0](../dev/changelog/17.0.0.md) -- [16.1.0](../dev/changelog/16.1.0.md) -- [16.0.0](../dev/changelog/16.0.0.md) -- [15.0.0](../dev/changelog/15.0.0.md) -- [14.0.0](../dev/changelog/14.0.0.md) -- [13.0.0](../dev/changelog/13.0.0.md) -- [12.0.0](../dev/changelog/12.0.0.md) -- [11.0.0](../dev/changelog/11.0.0.md) -- [10.0.0](../dev/changelog/10.0.0.md) -- [9.0.0](../dev/changelog/9.0.0.md) -- [8.0.0](../dev/changelog/8.0.0.md) -- [7.1.0](../dev/changelog/7.1.0.md) -- [7.0.0](../dev/changelog/7.0.0.md) -- [6.0.0](../dev/changelog/6.0.0.md) -- [5.0.0](../dev/changelog/5.0.0.md) +Change logs for each release can be found [here](https://github.com/apache/datafusion/tree/main/dev/changelog). diff --git a/dev/changelog/39.0.0.md b/dev/changelog/39.0.0.md new file mode 100644 index 000000000000..f94e34592c72 --- /dev/null +++ b/dev/changelog/39.0.0.md @@ -0,0 +1,303 @@ + + +## [39.0.0](https://github.com/apache/datafusion/tree/39.0.0) (2024-06-07) + +**Breaking changes:** + +- Remove ScalarFunctionDefinition [#10325](https://github.com/apache/datafusion/pull/10325) (lewiszlw) +- Introduce user-defined signature [#10439](https://github.com/apache/datafusion/pull/10439) (jayzhan211) +- Remove `AggregateFunctionDefinition::Name` [#10441](https://github.com/apache/datafusion/pull/10441) (lewiszlw) +- Make `CREATE EXTERNAL TABLE` format options consistent, remove special syntax for `HEADER ROW`, `DELIMITER` and `COMPRESSION` [#10404](https://github.com/apache/datafusion/pull/10404) (berkaysynnada) +- feat: allow `array_slice` to take an optional stride parameter [#10469](https://github.com/apache/datafusion/pull/10469) (jonahgao) +- Minor: Extend more style of udaf `expr_fn`, Remove order args for`covar_samp` and `covar_pop` [#10492](https://github.com/apache/datafusion/pull/10492) (jayzhan211) +- Remove `file_type()` from `FileFormat` [#10499](https://github.com/apache/datafusion/pull/10499) (Jefffrey) +- UDAF: Extend more args to `state_fields` and `groups_accumulator_supported` and introduce `ReversedUDAF` [#10525](https://github.com/apache/datafusion/pull/10525) (jayzhan211) +- Remove `Expr::GetIndexedField`, replace `Expr::{field,index,range}` with `FieldAccessor`, `IndexAccessor`, and `SliceAccessor` [#10568](https://github.com/apache/datafusion/pull/10568) (jayzhan211) +- Improve ContextProvider [#10577](https://github.com/apache/datafusion/pull/10577) (lewiszlw) +- Minor: Use slice in `ConcreteTreeNode` [#10666](https://github.com/apache/datafusion/pull/10666) (peter-toth) +- Add reference visitor `TreeNode` APIs, change `ExecutionPlan::children()` and `PhysicalExpr::children()` return references [#10543](https://github.com/apache/datafusion/pull/10543) (peter-toth) +- Introduce Sum UDAF [#10651](https://github.com/apache/datafusion/pull/10651) (jayzhan211) + +**Implemented enhancements:** + +- feat: optional args for regexp\_\* UDFs [#10514](https://github.com/apache/datafusion/pull/10514) (Michael-J-Ward) +- feat: Expose Parquet Schema Adapter [#10515](https://github.com/apache/datafusion/pull/10515) (HawaiianSpork) +- feat: API for collecting statistics/index for metadata of a parquet file + tests [#10537](https://github.com/apache/datafusion/pull/10537) (NGA-TRAN) +- feat: Add eliminate group by constant optimizer rule [#10591](https://github.com/apache/datafusion/pull/10591) (korowa) +- feat: extend `unnest` to support Struct datatype [#10429](https://github.com/apache/datafusion/pull/10429) (duongcongtoai) +- feat: add substrait support for Interval types and literals [#10646](https://github.com/apache/datafusion/pull/10646) (waynexia) +- feat: support unparsing LogicalPlan::Window nodes [#10767](https://github.com/apache/datafusion/pull/10767) (devinjdangelo) +- feat: Update Parquet row filtering to handle type coercion [#10716](https://github.com/apache/datafusion/pull/10716) (jeffreyssmith2nd) + +**Fixed bugs:** + +- fix: make `columnize_expr` resistant to display_name collisions [#10459](https://github.com/apache/datafusion/pull/10459) (jonahgao) +- fix: avoid compressed json files repartitioning [#10470](https://github.com/apache/datafusion/pull/10470) (korowa) +- fix: parsing timestamp with date format [#10476](https://github.com/apache/datafusion/pull/10476) (shanretoo) +- fix: `array_slice` panics [#10547](https://github.com/apache/datafusion/pull/10547) (jonahgao) +- fix: pass `quote` parameter to CSV writer [#10671](https://github.com/apache/datafusion/pull/10671) (DDtKey) +- fix: CI compilation failed on substrait [#10683](https://github.com/apache/datafusion/pull/10683) (jonahgao) +- fix: fix string repeat for negative numbers [#10760](https://github.com/apache/datafusion/pull/10760) (tshauck) +- fix: `array_slice` and `array_element` panicked on empty args [#10804](https://github.com/apache/datafusion/pull/10804) (jonahgao) + +**Documentation updates:** + +- Prepare 38.0.0 release candidate 1 [#10407](https://github.com/apache/datafusion/pull/10407) (andygrove) +- chore(docs): update subquery documentation with more information [#10361](https://github.com/apache/datafusion/pull/10361) (sanderson) +- minor: Remove docs archive [#10416](https://github.com/apache/datafusion/pull/10416) (andygrove) +- Minor: format comments in `PushDownFilter` rule [#10437](https://github.com/apache/datafusion/pull/10437) (alamb) +- Minor: Add usecase to comments in `LogicalPlan::recompute_schema` [#10443](https://github.com/apache/datafusion/pull/10443) (alamb) +- doc: fix old master branch references to main [#10458](https://github.com/apache/datafusion/pull/10458) (Jefffrey) +- Minor: Improved document string for `LogicalPlanBuilder` [#10496](https://github.com/apache/datafusion/pull/10496) (AbrarNitk) +- Add to_date function to scalar functions doc [#10601](https://github.com/apache/datafusion/pull/10601) (Omega359) +- Docs: Update PR workflow documentation [#10532](https://github.com/apache/datafusion/pull/10532) (alamb) +- Minor: Add examples of using TreeNode with `Expr` [#10686](https://github.com/apache/datafusion/pull/10686) (alamb) +- docs: add documents to substrait type variation consts [#10719](https://github.com/apache/datafusion/pull/10719) (waynexia) +- Minor: (Doc) Enable rt-multi-thread feature for sample code [#10770](https://github.com/apache/datafusion/pull/10770) (hsiang-c) + +**Merged pull requests:** + +- Prepare 38.0.0 release candidate 1 [#10407](https://github.com/apache/datafusion/pull/10407) (andygrove) +- Minor: Add more docs and examples for `Expr::unalias` [#10406](https://github.com/apache/datafusion/pull/10406) (alamb) +- minor: Remove [RUST][datafusion] from release vote email subject line [#10411](https://github.com/apache/datafusion/pull/10411) (andygrove) +- Remove ScalarFunctionDefinition [#10325](https://github.com/apache/datafusion/pull/10325) (lewiszlw) +- chore(docs): update subquery documentation with more information [#10361](https://github.com/apache/datafusion/pull/10361) (sanderson) +- fix dml logical plan output schema [#10394](https://github.com/apache/datafusion/pull/10394) (leoyvens) +- [MINOR]: Move transpose code to under common [#10409](https://github.com/apache/datafusion/pull/10409) (mustafasrepo) +- minor: Remove docs archive [#10416](https://github.com/apache/datafusion/pull/10416) (andygrove) +- Fix incorrect Schema over aggregate function, Remove unnecessary `exprlist_to_fields_aggregate` [#10408](https://github.com/apache/datafusion/pull/10408) (jonahgao) +- Enable user defined display_name for ScalarUDF [#10417](https://github.com/apache/datafusion/pull/10417) (yyy1000) +- Fix and improve `CommonSubexprEliminate` rule [#10396](https://github.com/apache/datafusion/pull/10396) (peter-toth) +- Simplify making information_schame tables [#10420](https://github.com/apache/datafusion/pull/10420) (lewiszlw) +- only consider main part of the url when deciding is_collection in listing table [#10419](https://github.com/apache/datafusion/pull/10419) (y-f-u) +- make common expression alias human-readable [#10333](https://github.com/apache/datafusion/pull/10333) (MohamedAbdeen21) +- Minor: Simplify + document `EliminateCrossJoin` better [#10427](https://github.com/apache/datafusion/pull/10427) (alamb) +- During expression equality, check for new ordering information [#10434](https://github.com/apache/datafusion/pull/10434) (mustafasrepo) +- Revert 10333 / changes to aliasing in CommonSubExprEliminate [#10436](https://github.com/apache/datafusion/pull/10436) (MohamedAbdeen21) +- Improve flight sql examples [#10432](https://github.com/apache/datafusion/pull/10432) (lewiszlw) +- Move Covariance (Population) covar_pop to be a User Defined Aggregate Function [#10418](https://github.com/apache/datafusion/pull/10418) (yyy1000) +- Stop copying LogicalPlan and Exprs in `OptimizeProjections` (2% faster planning) [#10405](https://github.com/apache/datafusion/pull/10405) (alamb) +- Minor: format comments in `PushDownFilter` rule [#10437](https://github.com/apache/datafusion/pull/10437) (alamb) +- chore: Improve release process for next time [#10447](https://github.com/apache/datafusion/pull/10447) (andygrove) +- Minor: Add usecase to comments in `LogicalPlan::recompute_schema` [#10443](https://github.com/apache/datafusion/pull/10443) (alamb) +- doc: fix old master branch references to main [#10458](https://github.com/apache/datafusion/pull/10458) (Jefffrey) +- Move bit_and_or_xor unit tests to slt [#10457](https://github.com/apache/datafusion/pull/10457) (NoeB) +- Introduce user-defined signature [#10439](https://github.com/apache/datafusion/pull/10439) (jayzhan211) +- Remove `AggregateFunctionDefinition::Name` [#10441](https://github.com/apache/datafusion/pull/10441) (lewiszlw) +- Remove some Expr clones in `EliminateCrossJoin`(3%-5% faster planning) [#10430](https://github.com/apache/datafusion/pull/10430) (alamb) +- refactor: Reduce string allocations in Expr::display_name (use write instead of format!) [#10454](https://github.com/apache/datafusion/pull/10454) (erratic-pattern) +- Make `CREATE EXTERNAL TABLE` format options consistent, remove special syntax for `HEADER ROW`, `DELIMITER` and `COMPRESSION` [#10404](https://github.com/apache/datafusion/pull/10404) (berkaysynnada) +- Add `simplify` method to aggregate function [#10354](https://github.com/apache/datafusion/pull/10354) (milenkovicm) +- Add cast array test to sqllogictest [#10474](https://github.com/apache/datafusion/pull/10474) (viirya) +- Add `Expr::try_as_col`, deprecate `Expr::try_into_col` (speed up optimizer) [#10448](https://github.com/apache/datafusion/pull/10448) (alamb) +- Implement `From>` for `LogicalPlanBuilder` [#10466](https://github.com/apache/datafusion/pull/10466) (AbrarNitk) +- Minor: Improve documentation for `catalog.has_header` config option [#10452](https://github.com/apache/datafusion/pull/10452) (alamb) +- Minor: Simplify conjunction and disjunction, improve docs [#10446](https://github.com/apache/datafusion/pull/10446) (alamb) +- Stop copying LogicalPlan and Exprs in `ReplaceDistinctWithAggregate` [#10460](https://github.com/apache/datafusion/pull/10460) (ClSlaid) +- Stop copying LogicalPlan and Exprs in `EliminateCrossJoin` (4% faster planning) [#10431](https://github.com/apache/datafusion/pull/10431) (alamb) +- Improved ergonomy for `CREATE EXTERNAL TABLE OPTIONS`: Don't require quotations for simple namespaced keys like `foo.bar` [#10483](https://github.com/apache/datafusion/pull/10483) (ozankabak) +- feat: allow `array_slice` to take an optional stride parameter [#10469](https://github.com/apache/datafusion/pull/10469) (jonahgao) +- Replace `GetFieldAccess` with indexing function in `SqlToRel ` [#10375](https://github.com/apache/datafusion/pull/10375) (jayzhan211) +- fix: make `columnize_expr` resistant to display_name collisions [#10459](https://github.com/apache/datafusion/pull/10459) (jonahgao) +- Fix values with different data types caused failure [#10445](https://github.com/apache/datafusion/pull/10445) (b41sh) +- fix: avoid compressed json files repartitioning [#10470](https://github.com/apache/datafusion/pull/10470) (korowa) +- Minor: Improved document string for `LogicalPlanBuilder` [#10496](https://github.com/apache/datafusion/pull/10496) (AbrarNitk) +- Fix SortMergeJoin with join filter filtering all rows out [#10495](https://github.com/apache/datafusion/pull/10495) (viirya) +- chore: use fullpath in macro to avoid declaring in other module [#10503](https://github.com/apache/datafusion/pull/10503) (jayzhan211) +- Minor: Extend more style of udaf `expr_fn`, Remove order args for`covar_samp` and `covar_pop` [#10492](https://github.com/apache/datafusion/pull/10492) (jayzhan211) +- Minor: remove unused source file `udf.rs` [#10497](https://github.com/apache/datafusion/pull/10497) (jonahgao) +- feat: optional args for regexp\_\* UDFs [#10514](https://github.com/apache/datafusion/pull/10514) (Michael-J-Ward) +- Support UDAF to align Builtin aggregate function [#10493](https://github.com/apache/datafusion/pull/10493) (jayzhan211) +- Remove `file_type()` from `FileFormat` [#10499](https://github.com/apache/datafusion/pull/10499) (Jefffrey) +- Minor: add a test for `current_time` (no args) [#10509](https://github.com/apache/datafusion/pull/10509) (alamb) +- fix: parsing timestamp with date format [#10476](https://github.com/apache/datafusion/pull/10476) (shanretoo) +- [MINOR]: Move pipeline checker rule to the end [#10502](https://github.com/apache/datafusion/pull/10502) (mustafasrepo) +- Minor: Extract parent/child limit calculation into a function, improve docs [#10501](https://github.com/apache/datafusion/pull/10501) (alamb) +- Fix window expr deserialization [#10506](https://github.com/apache/datafusion/pull/10506) (lewiszlw) +- Update substrait requirement from 0.32.0 to 0.33.3 [#10516](https://github.com/apache/datafusion/pull/10516) (dependabot[bot]) +- Stop copying LogicalPlan and Exprs in `TypeCoercion` (10% faster planning) [#10356](https://github.com/apache/datafusion/pull/10356) (alamb) +- Implement unparse `IS_NULL` to String and enhance the tests [#10529](https://github.com/apache/datafusion/pull/10529) (goldmedal) +- Fix panic in array_agg(distinct) query [#10526](https://github.com/apache/datafusion/pull/10526) (jayzhan211) +- UDAF: Extend more args to `state_fields` and `groups_accumulator_supported` and introduce `ReversedUDAF` [#10525](https://github.com/apache/datafusion/pull/10525) (jayzhan211) +- Move min_max unit tests to slt [#10539](https://github.com/apache/datafusion/pull/10539) (xinlifoobar) +- Implement unparse `IsNotFalse` to String [#10538](https://github.com/apache/datafusion/pull/10538) (goldmedal) +- Implement Unparse TryCast Expr --> String Support [#10542](https://github.com/apache/datafusion/pull/10542) (xinlifoobar) +- Implement unparse `Placeholder` to String [#10540](https://github.com/apache/datafusion/pull/10540) (reswqa) +- Implement unparse `OuterReferenceColumn` to String [#10544](https://github.com/apache/datafusion/pull/10544) (goldmedal) +- Stop copying LogicalPlan and Exprs in `PushDownFilter` (4%-6% faster planning) [#10444](https://github.com/apache/datafusion/pull/10444) (alamb) +- Stop most copying LogicalPlan and Exprs in `ScalarSubqueryToJoin` [#10489](https://github.com/apache/datafusion/pull/10489) (alamb) +- Example for simple Expr --> SQL conversion [#10528](https://github.com/apache/datafusion/pull/10528) (edmondop) +- fix `null_count` on `compute_record_batch_statistics` to report null counts across partitions [#10468](https://github.com/apache/datafusion/pull/10468) (samuelcolvin) +- fix: `array_slice` panics [#10547](https://github.com/apache/datafusion/pull/10547) (jonahgao) +- Minor: Add `PullUpCorrelatedExpr::new` and improve documentation [#10500](https://github.com/apache/datafusion/pull/10500) (alamb) +- Stop copying LogicalPlan and Exprs in `PushDownLimit` [#10508](https://github.com/apache/datafusion/pull/10508) (alamb) +- Break up contributing guide into smaller pages [#10533](https://github.com/apache/datafusion/pull/10533) (alamb) +- PhysicalExpr Orderings with Range Information [#10504](https://github.com/apache/datafusion/pull/10504) (berkaysynnada) +- Implement unparse `ScalarVariable` to String [#10541](https://github.com/apache/datafusion/pull/10541) (reswqa) +- feat: Expose Parquet Schema Adapter [#10515](https://github.com/apache/datafusion/pull/10515) (HawaiianSpork) +- Handle dictionary values in ScalarValue serde [#10563](https://github.com/apache/datafusion/pull/10563) (thinkharderdev) +- Improve signature of `get_field` function [#10569](https://github.com/apache/datafusion/pull/10569) (lewiszlw) +- Implement Unparse `GroupingSet` Expr --> String Support sql [#10555](https://github.com/apache/datafusion/pull/10555) (xinlifoobar) +- Minor: Move proxy to datafusion common [#10561](https://github.com/apache/datafusion/pull/10561) (jayzhan211) +- Update prost-build requirement from =0.12.4 to =0.12.6 [#10578](https://github.com/apache/datafusion/pull/10578) (dependabot[bot]) +- Add examples of how to convert logical plan to/from sql strings [#10558](https://github.com/apache/datafusion/pull/10558) (xinlifoobar) +- feat: API for collecting statistics/index for metadata of a parquet file + tests [#10537](https://github.com/apache/datafusion/pull/10537) (NGA-TRAN) +- Fix: Sort Merge Join LeftSemi issues when JoinFilter is set [#10304](https://github.com/apache/datafusion/pull/10304) (comphead) +- Remove `Expr::GetIndexedField`, replace `Expr::{field,index,range}` with `FieldAccessor`, `IndexAccessor`, and `SliceAccessor` [#10568](https://github.com/apache/datafusion/pull/10568) (jayzhan211) +- Minor: Fix `ArrayFunctionRewriter` name reporting [#10581](https://github.com/apache/datafusion/pull/10581) (alamb) +- Improve `UserDefinedLogicalNode::from_template` API to return `Result` [#10575](https://github.com/apache/datafusion/pull/10575) (lewiszlw) +- Migrate testing optimizer rules to use `rewrite` API [#10576](https://github.com/apache/datafusion/pull/10576) (lewiszlw) +- Improve ContextProvider [#10577](https://github.com/apache/datafusion/pull/10577) (lewiszlw) +- test: add more tests for statistics reading [#10592](https://github.com/apache/datafusion/pull/10592) (NGA-TRAN) +- refactor: reduce allocations in push down filter [#10567](https://github.com/apache/datafusion/pull/10567) (erratic-pattern) +- Fix compilation of datafusion-cli on 32bit targets [#10594](https://github.com/apache/datafusion/pull/10594) (nathaniel-daniel) +- Add to_date function to scalar functions doc [#10601](https://github.com/apache/datafusion/pull/10601) (Omega359) +- Rename monotonicity as output_ordering in ScalarUDF's [#10596](https://github.com/apache/datafusion/pull/10596) (berkaysynnada) +- Implement Unparser for `UNION ALL` [#10603](https://github.com/apache/datafusion/pull/10603) (phillipleblanc) +- Improve `UserDefinedLogicalNodeCore::from_template` API to return Result [#10597](https://github.com/apache/datafusion/pull/10597) (lewiszlw) +- Minor: Move group accumulator for aggregate function to physical-expr-common, and add ahash physical-expr-common [#10574](https://github.com/apache/datafusion/pull/10574) (jayzhan211) +- Minor: Consolidate some integration tests into `core_integration` [#10588](https://github.com/apache/datafusion/pull/10588) (alamb) +- Stop copying LogicalPlan and Exprs in `SingleDistinctToGroupBy` [#10527](https://github.com/apache/datafusion/pull/10527) (appletreeisyellow) +- feat: Add eliminate group by constant optimizer rule [#10591](https://github.com/apache/datafusion/pull/10591) (korowa) +- Docs: Update PR workflow documentation [#10532](https://github.com/apache/datafusion/pull/10532) (alamb) +- [MINOR]: Update get range implementation for lead lag window functions [#10614](https://github.com/apache/datafusion/pull/10614) (mustafasrepo) +- Minor: Improve documentation in sql_to_plan example [#10582](https://github.com/apache/datafusion/pull/10582) (alamb) +- Docs: add examples for `RuntimeEnv::register_object_store`, improve error messages [#10617](https://github.com/apache/datafusion/pull/10617) (aditanase) +- Add support for Substrait List/EmptyList literals [#10615](https://github.com/apache/datafusion/pull/10615) (Blizzara) +- Add to_unixtime function to scalar functions doc [#10620](https://github.com/apache/datafusion/pull/10620) (Omega359) +- Test for reading read statistics from parquet files without statistics and boolean & struct data type [#10608](https://github.com/apache/datafusion/pull/10608) (NGA-TRAN) +- adding benchmark for extracting arrow statistics from parquet [#10610](https://github.com/apache/datafusion/pull/10610) (Lordworms) +- feat: extend `unnest` to support Struct datatype [#10429](https://github.com/apache/datafusion/pull/10429) (duongcongtoai) +- Implement a dialect-specific rule for unparsing an identifier with or without quotes [#10573](https://github.com/apache/datafusion/pull/10573) (goldmedal) +- add catalog as part of the table path in plan_to_sql [#10612](https://github.com/apache/datafusion/pull/10612) (y-f-u) +- Refactor parquet row group pruning into a struct (use new statistics API, part 1) [#10607](https://github.com/apache/datafusion/pull/10607) (alamb) +- Extract `Date32` parquet statistics as `Date32Array` rather than `Int32Array` [#10593](https://github.com/apache/datafusion/pull/10593) (xinlifoobar) +- Omit NULLS FIRST/LAST when unparsing ORDER BY clauses for MySQL [#10625](https://github.com/apache/datafusion/pull/10625) (phillipleblanc) +- Fix broken build/test from merge [#10637](https://github.com/apache/datafusion/pull/10637) (phillipleblanc) +- Add SessionContext::register_object_store [#10621](https://github.com/apache/datafusion/pull/10621) (alamb) +- Minor: Move median test [#10611](https://github.com/apache/datafusion/pull/10611) (jayzhan211) +- Add support for Substrait Struct literals and type [#10622](https://github.com/apache/datafusion/pull/10622) (Blizzara) +- fix Incorrect statistics read for i8 i16 columns in parquet [#10629](https://github.com/apache/datafusion/pull/10629) (Lordworms) +- Minor: add runtime asserts to `RowGroup` [#10641](https://github.com/apache/datafusion/pull/10641) (alamb) +- Update cli Dockerfile to a newer ubuntu release, newer rust release [#10638](https://github.com/apache/datafusion/pull/10638) (Omega359) +- More properly handle nullability of types/literals in Substrait [#10640](https://github.com/apache/datafusion/pull/10640) (Blizzara) +- fix wrong type validation on unnest expr [#10657](https://github.com/apache/datafusion/pull/10657) (duongcongtoai) +- Fix incorrect statistics read for binary columns in parquet [#10645](https://github.com/apache/datafusion/pull/10645) (xinlifoobar) +- Fix `NULL["field"]` for expr_API [#10655](https://github.com/apache/datafusion/pull/10655) (alamb) +- Update substrait requirement from 0.33.3 to 0.34.0 [#10632](https://github.com/apache/datafusion/pull/10632) (dependabot[bot]) +- Fix typo in Cargo.toml (unused manifest key: dependencies.regex.worksapce) [#10662](https://github.com/apache/datafusion/pull/10662) (alamb) +- Add `FileScanConfig::new()` API [#10623](https://github.com/apache/datafusion/pull/10623) (alamb) +- Minor: Remove `GetFieldAccessSchema` [#10665](https://github.com/apache/datafusion/pull/10665) (jayzhan211) +- Minor: Use slice in `ConcreteTreeNode` [#10666](https://github.com/apache/datafusion/pull/10666) (peter-toth) +- Move Median to `functions-aggregate` and Introduce Numeric signature [#10644](https://github.com/apache/datafusion/pull/10644) (jayzhan211) +- Fix `Coalesce` casting logic to follows what Postgres and DuckDB do. Introduce signature that do non-comparison coercion [#10268](https://github.com/apache/datafusion/pull/10268) (jayzhan211) +- fix: pass `quote` parameter to CSV writer [#10671](https://github.com/apache/datafusion/pull/10671) (DDtKey) +- Fix compilation "comparison_binary_numeric_coercion not found" [#10677](https://github.com/apache/datafusion/pull/10677) (alamb) +- refactor: simplify converting List DataTypes to `ScalarValue` [#10675](https://github.com/apache/datafusion/pull/10675) (jonahgao) +- feat: add substrait support for Interval types and literals [#10646](https://github.com/apache/datafusion/pull/10646) (waynexia) +- Minor: Improve ObjectStoreUrl docs + examples [#10619](https://github.com/apache/datafusion/pull/10619) (alamb) +- fix: CI compilation failed on substrait [#10683](https://github.com/apache/datafusion/pull/10683) (jonahgao) +- Add tests for reading numeric limits in parquet statistics [#10642](https://github.com/apache/datafusion/pull/10642) (alamb) +- Update nix requirement from 0.28.0 to 0.29.0 [#10684](https://github.com/apache/datafusion/pull/10684) (dependabot[bot]) +- refactor: Move SchemaAdapter from parquet module to data source [#10680](https://github.com/apache/datafusion/pull/10680) (HawaiianSpork) +- Add reference visitor `TreeNode` APIs, change `ExecutionPlan::children()` and `PhysicalExpr::children()` return references [#10543](https://github.com/apache/datafusion/pull/10543) (peter-toth) +- Convert first, last aggregate function to UDAF [#10648](https://github.com/apache/datafusion/pull/10648) (mustafasrepo) +- Minor: CastExpr Ordering Handle [#10650](https://github.com/apache/datafusion/pull/10650) (berkaysynnada) +- Factor out common datafusion types into another proto file [#10649](https://github.com/apache/datafusion/pull/10649) (mustafasrepo) +- Minor: Add tests showing aggregate behavior for NaNs [#10634](https://github.com/apache/datafusion/pull/10634) (alamb) +- Improve `ParquetExec` and related documentation [#10647](https://github.com/apache/datafusion/pull/10647) (alamb) +- minor: inconsistent group by position planning [#10679](https://github.com/apache/datafusion/pull/10679) (korowa) +- Remove duplicate function name in its aliases list [#10661](https://github.com/apache/datafusion/pull/10661) (goldmedal) +- Add protobuf serde support for `LogicalPlan::Unnest` [#10681](https://github.com/apache/datafusion/pull/10681) (akoshchiy) +- Support Substrait's VirtualTables [#10531](https://github.com/apache/datafusion/pull/10531) (Blizzara) +- support serialization and deserialization limit in the aggregation exec [#10692](https://github.com/apache/datafusion/pull/10692) (liukun4515) +- Display date32/64 in YYYY-MM-DD format [#10691](https://github.com/apache/datafusion/pull/10691) (houqp) +- Fix: array list values are leaked on nested `unnest` operators [#10689](https://github.com/apache/datafusion/pull/10689) (duongcongtoai) +- Support LogicalPlan::Distinct in unparser [#10690](https://github.com/apache/datafusion/pull/10690) (yyy1000) +- Remove redundant upper case aliases for `median`, `first_value` and `last_value` [#10696](https://github.com/apache/datafusion/pull/10696) (goldmedal) +- Minor: improve Expr documentation [#10685](https://github.com/apache/datafusion/pull/10685) (alamb) +- chore: align re-exports in functions-aggregate [#10705](https://github.com/apache/datafusion/pull/10705) (waynexia) +- Fix typo in bench.sh [#10698](https://github.com/apache/datafusion/pull/10698) (vimt) +- Fix incorrect statistics read for unsigned integers columns in parquet [#10704](https://github.com/apache/datafusion/pull/10704) (xinlifoobar) +- Separate `Partitioning` protobuf serialization code [#10708](https://github.com/apache/datafusion/pull/10708) (lewiszlw) +- Support consuming Substrait with compound signature function names [#10653](https://github.com/apache/datafusion/pull/10653) (Blizzara) +- Minor: Add examples of using TreeNode with `Expr` [#10686](https://github.com/apache/datafusion/pull/10686) (alamb) +- Minor: Add examples of using TreeNode with `LogicalPlan` [#10687](https://github.com/apache/datafusion/pull/10687) (alamb) +- Add `ParquetExec::builder()`, deprecate `ParquetExec::new` [#10636](https://github.com/apache/datafusion/pull/10636) (alamb) +- feature: Add a WindowUDFImpl::simplify() API [#9906](https://github.com/apache/datafusion/pull/9906) (guojidan) +- Chore: clean up udwf example && remove redundant import [#10718](https://github.com/apache/datafusion/pull/10718) (guojidan) +- Push down filter as table partition list prefix [#10693](https://github.com/apache/datafusion/pull/10693) (houqp) +- Make swap_hash_join public API [#10702](https://github.com/apache/datafusion/pull/10702) (viirya) +- ci: fix clippy error on main [#10723](https://github.com/apache/datafusion/pull/10723) (jonahgao) +- CI: Fix complaints from newer Clippy versions [#10725](https://github.com/apache/datafusion/pull/10725) (comphead) +- Remove Eager Trait for Joins [#10721](https://github.com/apache/datafusion/pull/10721) (berkaysynnada) +- Minor: fix signature `fn octect_length()` [#10726](https://github.com/apache/datafusion/pull/10726) (marvinlanhenke) +- docs: add documents to substrait type variation consts [#10719](https://github.com/apache/datafusion/pull/10719) (waynexia) +- Update rstest requirement from 0.19.0 to 0.20.0 [#10734](https://github.com/apache/datafusion/pull/10734) (dependabot[bot]) +- Update rstest_reuse requirement from 0.6.0 to 0.7.0 [#10733](https://github.com/apache/datafusion/pull/10733) (dependabot[bot]) +- Add example for building an external secondary index for parquet files [#10549](https://github.com/apache/datafusion/pull/10549) (alamb) +- Minor: move stddev test to slt [#10741](https://github.com/apache/datafusion/pull/10741) (marvinlanhenke) +- fix(CLI): can not create external tables with format options [#10739](https://github.com/apache/datafusion/pull/10739) (jonahgao) +- Add support for `AggregateExpr`, `WindowExpr` rewrite. [#10742](https://github.com/apache/datafusion/pull/10742) (mustafasrepo) +- Fix SMJ Left Anti Join when the join filter is set [#10724](https://github.com/apache/datafusion/pull/10724) (comphead) +- Introduce FunctionRegistry dependency to optimize and rewrite rule [#10714](https://github.com/apache/datafusion/pull/10714) (jayzhan211) +- Minor: Add SMJ to TPCH benchmark usage [#10747](https://github.com/apache/datafusion/pull/10747) (comphead) +- Minor: Split physical_plan/parquet/mod.rs into smaller modules [#10727](https://github.com/apache/datafusion/pull/10727) (alamb) +- minor: consolidate unparser integration tests [#10736](https://github.com/apache/datafusion/pull/10736) (devinjdangelo) +- Minor: Move aggregate variance to slt [#10750](https://github.com/apache/datafusion/pull/10750) (marvinlanhenke) +- fix: fix string repeat for negative numbers [#10760](https://github.com/apache/datafusion/pull/10760) (tshauck) +- Introduce Sum UDAF [#10651](https://github.com/apache/datafusion/pull/10651) (jayzhan211) +- Extract parquet statistics from timestamps with timezones [#10766](https://github.com/apache/datafusion/pull/10766) (xinlifoobar) +- Minor: Add tests for extracting dictionary parquet statistics [#10729](https://github.com/apache/datafusion/pull/10729) (alamb) +- Update rstest requirement from 0.20.0 to 0.21.0 [#10774](https://github.com/apache/datafusion/pull/10774) (dependabot[bot]) +- Minor: Refactor memory size estimation for HashTable [#10748](https://github.com/apache/datafusion/pull/10748) (marvinlanhenke) +- Reduce code repetition in `datafusion/functions` mod files [#10700](https://github.com/apache/datafusion/pull/10700) (MohamedAbdeen21) +- Minor: (Doc) Enable rt-multi-thread feature for sample code [#10770](https://github.com/apache/datafusion/pull/10770) (hsiang-c) +- Support negatives in split part [#10780](https://github.com/apache/datafusion/pull/10780) (tshauck) +- feat: support unparsing LogicalPlan::Window nodes [#10767](https://github.com/apache/datafusion/pull/10767) (devinjdangelo) +- Extract parquet statistics from `LargeUtf8` columns and Add tests for `UTF8` And `LargeUTF8` [#10762](https://github.com/apache/datafusion/pull/10762) (Weijun-H) +- Cleanup GetIndexedField [#10769](https://github.com/apache/datafusion/pull/10769) (lewiszlw) +- Extract parquet statistics from f16 columns, add `ScalarValue::Float16` [#10763](https://github.com/apache/datafusion/pull/10763) (Lordworms) +- Handle empty rows for `array_sort` [#10786](https://github.com/apache/datafusion/pull/10786) (jayzhan211) +- Fix extract parquet statistics from LargeBinary columns [#10775](https://github.com/apache/datafusion/pull/10775) (xinlifoobar) +- Extract parquet statistics from Time32 and Time64 columns [#10771](https://github.com/apache/datafusion/pull/10771) (Lordworms) +- chore: fix `last_value` coercion [#10783](https://github.com/apache/datafusion/pull/10783) (appletreeisyellow) +- Fix extract parquet statistics from Decimal256 columns [#10777](https://github.com/apache/datafusion/pull/10777) (xinlifoobar) +- Speed up arrow_statistics test [#10735](https://github.com/apache/datafusion/pull/10735) (alamb) +- minor: Refactor some unparser methods to improve readability [#10788](https://github.com/apache/datafusion/pull/10788) (devinjdangelo) +- Convert variance sample to udaf [#10713](https://github.com/apache/datafusion/pull/10713) (yyin-dev) +- Improve docs and fix a typo [#10798](https://github.com/apache/datafusion/pull/10798) (lewiszlw) +- fix: `array_slice` and `array_element` panicked on empty args [#10804](https://github.com/apache/datafusion/pull/10804) (jonahgao) +- Avoid the usage of intermediate ScalarValue to improve performance of extracting statistics from parquet files [#10711](https://github.com/apache/datafusion/pull/10711) (xinlifoobar) +- SMJ: Add more tests and improve comments [#10784](https://github.com/apache/datafusion/pull/10784) (comphead) +- feat: Update Parquet row filtering to handle type coercion [#10716](https://github.com/apache/datafusion/pull/10716) (jeffreyssmith2nd) +- Handle EmptyRelation during SQL unparsing [#10803](https://github.com/apache/datafusion/pull/10803) (goldmedal) +- Document Committer and PMC process [#10778](https://github.com/apache/datafusion/pull/10778) (alamb) +- Int64 as default type for make_array function empty or null case [#10790](https://github.com/apache/datafusion/pull/10790) (jayzhan211) +- Split `SessionState` into its own module [#10794](https://github.com/apache/datafusion/pull/10794) (alamb) +- Add `StreamProvider` for configuring `StreamTable` [#10600](https://github.com/apache/datafusion/pull/10600) (matthewmturner) +- Bench: Add `PREFER_HASH_JOIN` env variable [#10809](https://github.com/apache/datafusion/pull/10809) (comphead) +- Add `ParquetAccessPlan`, unify RowGroup selection and PagePruning selection [#10738](https://github.com/apache/datafusion/pull/10738) (alamb) +- Fix `ScalarUDFImpl::propagate_constraints` doc [#10810](https://github.com/apache/datafusion/pull/10810) (lewiszlw) +- Extract Parquet statistics from `Interval` column [#10801](https://github.com/apache/datafusion/pull/10801) (marvinlanhenke) +- build(deps): upgrade sqlparser to 0.47.0 [#10392](https://github.com/apache/datafusion/pull/10392) (tisonkun) +- Refactor and simplify the SQL unparser [#10811](https://github.com/apache/datafusion/pull/10811) (goldmedal) +- Minor: Remove code duplication in `memory_limit` derivation for datafusion-cli [#10814](https://github.com/apache/datafusion/pull/10814) (comphead) +- build(deps): update Arrow/Parquet to `52.0`, object-store to `0.10` [#10765](https://github.com/apache/datafusion/pull/10765) (waynexia) diff --git a/dev/release/README.md b/dev/release/README.md index 3b997da89cfe..749af8696b0f 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -96,7 +96,7 @@ pip3 install PyGitHub Run the following command to generate the changelog content. ```bash -$ GITHUB_TOKEN= ./dev/release/generate-changelog.py apache/datafusion 24.0.0 HEAD > dev/changelog/25.0.0.md +$ GITHUB_TOKEN= ./dev/release/generate-changelog.py 24.0.0 HEAD > dev/changelog/25.0.0.md ``` This script creates a changelog from GitHub PRs based on the labels associated with them as well as looking for @@ -112,7 +112,6 @@ Generating changelog content This process is not fully automated, so there are some additional manual steps: - Add the ASF header to the generated file -- Add a link to this changelog from the top-level `/datafusion/CHANGELOG.md` - Add the following content (copy from the previous version's changelog and update as appropriate: ``` diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index 74e77ce846e5..424baece6023 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -103,16 +103,16 @@ def cli(args=None): args = sys.argv[1:] parser = argparse.ArgumentParser() - parser.add_argument("project", help="The project name e.g. apache/datafusion") - parser.add_argument("tag1", help="The previous release tag") - parser.add_argument("tag2", help="The current release tag") + parser.add_argument("tag1", help="The previous release tag (e.g. 38.0.0)") + parser.add_argument("tag2", help="The current release tag (e.g. HEAD)") args = parser.parse_args() token = os.getenv("GITHUB_TOKEN") + project = "apache/datafusion" g = Github(token) - repo = g.get_repo(args.project) - generate_changelog(repo, args.project, args.tag1, args.tag2) + repo = g.get_repo(project) + generate_changelog(repo, project, args.tag1, args.tag2) if __name__ == "__main__": cli() \ No newline at end of file diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 0cfd81eff75a..80d88632ffdb 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -64,7 +64,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.execution.parquet.statistics_enabled | NULL | Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_statistics_size | NULL | Sets max statistics size for any column. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_row_group_size | 1048576 | Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | -| datafusion.execution.parquet.created_by | datafusion version 38.0.0 | Sets "created by" property | +| datafusion.execution.parquet.created_by | datafusion version 39.0.0 | Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | NULL | Sets column index truncate length | | datafusion.execution.parquet.data_page_row_count_limit | 18446744073709551615 | Sets best effort maximum number of rows in data page | | datafusion.execution.parquet.encoding | NULL | Sets default encoding for any column Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting |