apache · leung-ming · Jun 19, 2025 · Jun 18, 2025 · Jun 19, 2025 · Jun 20, 2025
diff --git a/NOTICE.txt b/NOTICE.txt
@@ -10,4 +10,7 @@ Specifically:
 - Optimizer rule to replace SortMergeJoin with ShuffleHashJoin
 
 This product includes software developed at
-DataFusion HDFS ObjectStore Contrib Package(https://github.com/datafusion-contrib/datafusion-objectstore-hdfs)
+DataFusion HDFS ObjectStore Contrib Package(https://github.com/datafusion-contrib/datafusion-objectstore-hdfs)
+
+This product includes software developed at
+dragonbox (https://github.com/dtolnay/dragonbox)
diff --git a/docs/source/user-guide/compatibility.md b/docs/source/user-guide/compatibility.md
@@ -180,13 +180,15 @@ The following cast operations are generally compatible with Spark except for the
 | float | integer |  |
 | float | long |  |
 | float | double |  |
+| float | decimal |  |
 | float | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
 | double | boolean |  |
 | double | byte |  |
 | double | short |  |
 | double | integer |  |
 | double | long |  |
 | double | float |  |
+| double | decimal |  |
 | double | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
 | decimal | byte |  |
 | decimal | short |  |
@@ -216,8 +218,6 @@ The following cast operations are not compatible with Spark for all inputs and a
 |-|-|-|
 | integer | decimal  | No overflow check |
 | long | decimal  | No overflow check |
-| float | decimal  | There can be rounding differences |
-| double | decimal  | There can be rounding differences |
 | string | float  | Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. Does not support ANSI mode. |
 | string | double  | Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. Does not support ANSI mode. |
 | string | decimal  | Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. Does not support ANSI mode. Returns 0.0 instead of null if input contains no digits |

diff --git a/native/Cargo.lock b/native/Cargo.lock
diff --git a/native/Cargo.toml b/native/Cargo.toml
@@ -16,8 +16,8 @@
 # under the License.
 
 [workspace]
-default-members = ["core", "spark-expr", "proto"]
-members = ["core", "spark-expr", "proto", "hdfs"]
+default-members = ["core", "spark-expr", "proto", "dragonbox"]
+members = ["core", "spark-expr", "proto", "hdfs", "dragonbox"]
 resolver = "2"
 
 [workspace.package]

diff --git a/native/dragonbox/Cargo.toml b/native/dragonbox/Cargo.toml
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-comet-dragonbox"
+description = "Comet dragonbox integration"
+version = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+authors = { workspace = true }
+readme = { workspace = true }
+license = { workspace = true }
+edition = { workspace = true }
+
+[package.metadata.docs.rs]
+rustdoc-args = [
+    "--generate-link-to-definition",
+    "--extern-html-root-url=core=https://doc.rust-lang.org",
+    "--extern-html-root-url=alloc=https://doc.rust-lang.org",
+    "--extern-html-root-url=std=https://doc.rust-lang.org",
+]
diff --git a/native/dragonbox/README.md b/native/dragonbox/README.md
@@ -0,0 +1,25 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion Comet: dragonbox integration
+
+This crate contains the dragonbox integration
+and is intended to be used as part of the Apache DataFusion Comet project
+
+The binary floating-point to decimal floating-point conversion powered by [dragonbox](https://github.com/dtolnay/dragonbox).
diff --git a/native/dragonbox/src/buffer.rs b/native/dragonbox/src/buffer.rs
@@ -0,0 +1,133 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{Buffer, Float};
+use core::mem::MaybeUninit;
+use core::slice;
+use core::str;
+
+impl Buffer {
+    /// This is a cheap operation; you don't need to worry about reusing buffers
+    /// for efficiency.
+    #[inline]
+    pub fn new() -> Self {
+        let bytes = [MaybeUninit::<u8>::uninit(); 24];
+        Buffer { bytes }
+    }
+
+    /// Print a floating point number into this buffer and return a reference to
+    /// its string representation within the buffer.
+    ///
+    /// # Special cases
+    ///
+    /// This function formats NaN as the string "NaN", positive infinity as
+    /// "inf", and negative infinity as "-inf" to match std::fmt.
+    ///
+    /// If your input is known to be finite, you may get better performance by
+    /// calling the `format_finite` method instead of `format` to avoid the
+    /// checks for special cases.
+    pub fn format<F: Float>(&mut self, f: F) -> &str {
+        if f.is_nonfinite() {
+            f.format_nonfinite()
+        } else {
+            self.format_finite(f)
+        }
+    }
+
+    /// Print a floating point number into this buffer and return a reference to
+    /// its string representation within the buffer.
+    ///
+    /// # Special cases
+    ///
+    /// This function **does not** check for NaN or infinity. If the input
+    /// number is not a finite float, the printed representation will be some
+    /// correctly formatted but unspecified numerical value.
+    ///
+    /// Please check [`is_finite`] yourself before calling this function, or
+    /// check [`is_nan`] and [`is_infinite`] and handle those cases yourself.
+    ///
+    /// [`is_finite`]: f64::is_finite
+    /// [`is_nan`]: f64::is_nan
+    /// [`is_infinite`]: f64::is_infinite
+    #[inline]
+    pub fn format_finite<F: Float>(&mut self, f: F) -> &str {
+        unsafe {
+            let n = f.write_to_dragonbox_buffer(self.bytes.as_mut_ptr() as *mut u8);
+            debug_assert!(n <= self.bytes.len());
+            let slice = slice::from_raw_parts(self.bytes.as_ptr() as *const u8, n);
+            str::from_utf8_unchecked(slice)
+        }
+    }
+}
+
+impl Copy for Buffer {}
+
+impl Clone for Buffer {
+    #[inline]
+    #[allow(clippy::non_canonical_clone_impl)] // false positive https://github.com/rust-lang/rust-clippy/issues/11072
+    fn clone(&self) -> Self {
+        Buffer::new()
+    }
+}
+
+impl Default for Buffer {
+    #[inline]
+    fn default() -> Self {
+        Buffer::new()
+    }
+}
+
+impl Float for f64 {}
+
+const NAN: &str = "NaN";
+const INFINITY: &str = "inf";
+const NEG_INFINITY: &str = "-inf";
+
+pub trait Sealed: Copy {
+    fn is_nonfinite(self) -> bool;
+    fn format_nonfinite(self) -> &'static str;
+    unsafe fn write_to_dragonbox_buffer(self, result: *mut u8) -> usize;
+}
+
+impl Sealed for f64 {
+    #[inline]
+    fn is_nonfinite(self) -> bool {
+        const EXP_MASK: u64 = 0x7ff0000000000000;
+        let bits = self.to_bits();
+        bits & EXP_MASK == EXP_MASK
+    }
+
+    #[cold]
+    fn format_nonfinite(self) -> &'static str {
+        const MANTISSA_MASK: u64 = 0x000fffffffffffff;
+        const SIGN_MASK: u64 = 0x8000000000000000;
+        let bits = self.to_bits();
+        if bits & MANTISSA_MASK != 0 {
+            NAN
+        } else if bits & SIGN_MASK != 0 {
+            NEG_INFINITY
+        } else {
+            INFINITY
+        }
+    }
+
+    #[inline]
+    unsafe fn write_to_dragonbox_buffer(self, buffer: *mut u8) -> usize {
+        let end = crate::to_chars::to_chars(self, buffer);
+        end.offset_from(buffer) as usize
+    }
+}