Skip to content

Commit c74a27a

Browse files
ZENOTMEZENOTME
andauthored
fix: fix float compare order (#1416)
## Which issue does this PR close? Closes #1415 ## What changes are included in this PR? ## Are these changes tested? Co-authored-by: ZENOTME <st810918843@gmail.com>
1 parent bddffa1 commit c74a27a

File tree

1 file changed

+101
-3
lines changed

1 file changed

+101
-3
lines changed

crates/iceberg/src/spec/values.rs

Lines changed: 101 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121

2222
use std::any::Any;
23+
use std::cmp::Ordering;
2324
use std::collections::HashMap;
2425
use std::fmt::{Display, Formatter};
2526
use std::hash::Hash;
@@ -29,7 +30,7 @@ use std::str::FromStr;
2930
pub use _serde::RawLiteral;
3031
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
3132
use num_bigint::BigInt;
32-
use ordered_float::OrderedFloat;
33+
use ordered_float::{Float, OrderedFloat};
3334
use rust_decimal::Decimal;
3435
use rust_decimal::prelude::ToPrimitive;
3536
use serde::de::{
@@ -214,6 +215,36 @@ impl<'de> Deserialize<'de> for Datum {
214215
}
215216
}
216217

218+
// Compare following iceberg float ordering rules:
219+
// -NaN < -Infinity < -value < -0 < 0 < value < Infinity < NaN
220+
fn iceberg_float_cmp<T: Float>(a: T, b: T) -> Option<Ordering> {
221+
if a.is_nan() && b.is_nan() {
222+
return match (a.is_sign_negative(), b.is_sign_negative()) {
223+
(true, false) => Some(Ordering::Less),
224+
(false, true) => Some(Ordering::Greater),
225+
_ => Some(Ordering::Equal),
226+
};
227+
}
228+
229+
if a.is_nan() {
230+
return Some(if a.is_sign_negative() {
231+
Ordering::Less
232+
} else {
233+
Ordering::Greater
234+
});
235+
}
236+
237+
if b.is_nan() {
238+
return Some(if b.is_sign_negative() {
239+
Ordering::Greater
240+
} else {
241+
Ordering::Less
242+
});
243+
}
244+
245+
a.partial_cmp(&b)
246+
}
247+
217248
impl PartialOrd for Datum {
218249
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
219250
match (&self.literal, &other.literal, &self.r#type, &other.r#type) {
@@ -241,13 +272,13 @@ impl PartialOrd for Datum {
241272
PrimitiveLiteral::Float(other_val),
242273
PrimitiveType::Float,
243274
PrimitiveType::Float,
244-
) => val.partial_cmp(other_val),
275+
) => iceberg_float_cmp(*val, *other_val),
245276
(
246277
PrimitiveLiteral::Double(val),
247278
PrimitiveLiteral::Double(other_val),
248279
PrimitiveType::Double,
249280
PrimitiveType::Double,
250-
) => val.partial_cmp(other_val),
281+
) => iceberg_float_cmp(*val, *other_val),
251282
(
252283
PrimitiveLiteral::Int(val),
253284
PrimitiveLiteral::Int(other_val),
@@ -3845,4 +3876,71 @@ mod tests {
38453876

38463877
assert_eq!(result, expected);
38473878
}
3879+
3880+
#[test]
3881+
fn test_iceberg_float_order() {
3882+
// Test float ordering
3883+
let float_values = vec![
3884+
Datum::float(f32::NAN),
3885+
Datum::float(-f32::NAN),
3886+
Datum::float(f32::MAX),
3887+
Datum::float(f32::MIN),
3888+
Datum::float(f32::INFINITY),
3889+
Datum::float(-f32::INFINITY),
3890+
Datum::float(1.0),
3891+
Datum::float(-1.0),
3892+
Datum::float(0.0),
3893+
Datum::float(-0.0),
3894+
];
3895+
3896+
let mut float_sorted = float_values.clone();
3897+
float_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
3898+
3899+
let float_expected = vec![
3900+
Datum::float(-f32::NAN),
3901+
Datum::float(-f32::INFINITY),
3902+
Datum::float(f32::MIN),
3903+
Datum::float(-1.0),
3904+
Datum::float(-0.0),
3905+
Datum::float(0.0),
3906+
Datum::float(1.0),
3907+
Datum::float(f32::MAX),
3908+
Datum::float(f32::INFINITY),
3909+
Datum::float(f32::NAN),
3910+
];
3911+
3912+
assert_eq!(float_sorted, float_expected);
3913+
3914+
// Test double ordering
3915+
let double_values = vec![
3916+
Datum::double(f64::NAN),
3917+
Datum::double(-f64::NAN),
3918+
Datum::double(f64::INFINITY),
3919+
Datum::double(-f64::INFINITY),
3920+
Datum::double(f64::MAX),
3921+
Datum::double(f64::MIN),
3922+
Datum::double(1.0),
3923+
Datum::double(-1.0),
3924+
Datum::double(0.0),
3925+
Datum::double(-0.0),
3926+
];
3927+
3928+
let mut double_sorted = double_values.clone();
3929+
double_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
3930+
3931+
let double_expected = vec![
3932+
Datum::double(-f64::NAN),
3933+
Datum::double(-f64::INFINITY),
3934+
Datum::double(f64::MIN),
3935+
Datum::double(-1.0),
3936+
Datum::double(-0.0),
3937+
Datum::double(0.0),
3938+
Datum::double(1.0),
3939+
Datum::double(f64::MAX),
3940+
Datum::double(f64::INFINITY),
3941+
Datum::double(f64::NAN),
3942+
];
3943+
3944+
assert_eq!(double_sorted, double_expected);
3945+
}
38483946
}

0 commit comments

Comments
 (0)