|
| 1 | +use std::io::{self, Write}; |
| 2 | +use std::num::NonZeroU64; |
| 3 | + |
| 4 | +use common::BinarySerializable; |
| 5 | +use fastdivide::DividerU64; |
| 6 | + |
| 7 | +#[derive(Debug, Clone, Copy)] |
| 8 | +pub struct GCDParams { |
| 9 | + pub gcd: u64, |
| 10 | + pub min_value: u64, |
| 11 | + pub num_vals: u64, |
| 12 | +} |
| 13 | + |
| 14 | +impl BinarySerializable for GCDParams { |
| 15 | + fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> { |
| 16 | + self.gcd.serialize(writer)?; |
| 17 | + self.min_value.serialize(writer)?; |
| 18 | + self.num_vals.serialize(writer)?; |
| 19 | + Ok(()) |
| 20 | + } |
| 21 | + |
| 22 | + fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> { |
| 23 | + let gcd: u64 = u64::deserialize(reader)?; |
| 24 | + let min_value: u64 = u64::deserialize(reader)?; |
| 25 | + let num_vals: u64 = u64::deserialize(reader)?; |
| 26 | + Ok(Self { |
| 27 | + gcd, |
| 28 | + min_value, |
| 29 | + num_vals, |
| 30 | + }) |
| 31 | + } |
| 32 | +} |
| 33 | + |
| 34 | +/// Compute the gcd of two non null numbers. |
| 35 | +/// |
| 36 | +/// It is recommended, but not required, to feed values such that `large >= small`. |
| 37 | +fn compute_gcd(mut large: NonZeroU64, mut small: NonZeroU64) -> NonZeroU64 { |
| 38 | + loop { |
| 39 | + let rem: u64 = large.get() % small; |
| 40 | + if let Some(new_small) = NonZeroU64::new(rem) { |
| 41 | + (large, small) = (small, new_small); |
| 42 | + } else { |
| 43 | + return small; |
| 44 | + } |
| 45 | + } |
| 46 | +} |
| 47 | + |
| 48 | +// Find GCD for iterator of numbers |
| 49 | +pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<NonZeroU64> { |
| 50 | + let mut numbers = numbers.flat_map(NonZeroU64::new); |
| 51 | + let mut gcd: NonZeroU64 = numbers.next()?; |
| 52 | + if gcd.get() == 1 { |
| 53 | + return Some(gcd); |
| 54 | + } |
| 55 | + |
| 56 | + let mut gcd_divider = DividerU64::divide_by(gcd.get()); |
| 57 | + for val in numbers { |
| 58 | + let remainder = val.get() - (gcd_divider.divide(val.get())) * gcd.get(); |
| 59 | + if remainder == 0 { |
| 60 | + continue; |
| 61 | + } |
| 62 | + gcd = compute_gcd(val, gcd); |
| 63 | + if gcd.get() == 1 { |
| 64 | + return Some(gcd); |
| 65 | + } |
| 66 | + |
| 67 | + gcd_divider = DividerU64::divide_by(gcd.get()); |
| 68 | + } |
| 69 | + Some(gcd) |
| 70 | +} |
| 71 | + |
| 72 | +#[cfg(test)] |
| 73 | +mod tests { |
| 74 | + use std::io; |
| 75 | + use std::num::NonZeroU64; |
| 76 | + |
| 77 | + use ownedbytes::OwnedBytes; |
| 78 | + |
| 79 | + use crate::gcd::{compute_gcd, find_gcd}; |
| 80 | + use crate::{FastFieldCodecType, VecColumn}; |
| 81 | + |
| 82 | + fn test_fastfield_gcd_i64_with_codec( |
| 83 | + codec_type: FastFieldCodecType, |
| 84 | + num_vals: usize, |
| 85 | + ) -> io::Result<()> { |
| 86 | + let mut vals: Vec<i64> = (-4..=(num_vals as i64) - 5).map(|val| val * 1000).collect(); |
| 87 | + let mut buffer: Vec<u8> = Vec::new(); |
| 88 | + crate::serialize( |
| 89 | + VecColumn::from(&vals), |
| 90 | + &mut buffer, |
| 91 | + &[codec_type, FastFieldCodecType::Gcd], |
| 92 | + )?; |
| 93 | + let buffer = OwnedBytes::new(buffer); |
| 94 | + let column = crate::open::<i64>(buffer.clone())?; |
| 95 | + assert_eq!(column.get_val(0), -4000i64); |
| 96 | + assert_eq!(column.get_val(1), -3000i64); |
| 97 | + assert_eq!(column.get_val(2), -2000i64); |
| 98 | + assert_eq!(column.max_value(), (num_vals as i64 - 5) * 1000); |
| 99 | + assert_eq!(column.min_value(), -4000i64); |
| 100 | + |
| 101 | + // Can't apply gcd |
| 102 | + let mut buffer_without_gcd = Vec::new(); |
| 103 | + vals.pop(); |
| 104 | + vals.push(1001i64); |
| 105 | + crate::serialize( |
| 106 | + VecColumn::from(&vals), |
| 107 | + &mut buffer_without_gcd, |
| 108 | + &[codec_type], |
| 109 | + )?; |
| 110 | + let buffer_without_gcd = OwnedBytes::new(buffer_without_gcd); |
| 111 | + assert!(buffer_without_gcd.len() > buffer.len()); |
| 112 | + |
| 113 | + Ok(()) |
| 114 | + } |
| 115 | + |
| 116 | + #[test] |
| 117 | + fn test_fastfield_gcd_i64() -> io::Result<()> { |
| 118 | + for &codec_type in &[ |
| 119 | + FastFieldCodecType::Bitpacked, |
| 120 | + FastFieldCodecType::BlockwiseLinear, |
| 121 | + FastFieldCodecType::Linear, |
| 122 | + ] { |
| 123 | + test_fastfield_gcd_i64_with_codec(codec_type, 5500)?; |
| 124 | + } |
| 125 | + Ok(()) |
| 126 | + } |
| 127 | + |
| 128 | + fn test_fastfield_gcd_u64_with_codec( |
| 129 | + codec_type: FastFieldCodecType, |
| 130 | + num_vals: usize, |
| 131 | + ) -> io::Result<()> { |
| 132 | + let mut vals: Vec<u64> = (1..=num_vals).map(|i| i as u64 * 1000u64).collect(); |
| 133 | + let mut buffer: Vec<u8> = Vec::new(); |
| 134 | + crate::serialize( |
| 135 | + VecColumn::from(&vals), |
| 136 | + &mut buffer, |
| 137 | + &[codec_type, FastFieldCodecType::Gcd], |
| 138 | + )?; |
| 139 | + let buffer = OwnedBytes::new(buffer); |
| 140 | + let column = crate::open::<u64>(buffer.clone())?; |
| 141 | + assert_eq!(column.get_val(0), 1000u64); |
| 142 | + assert_eq!(column.get_val(1), 2000u64); |
| 143 | + assert_eq!(column.get_val(2), 3000u64); |
| 144 | + assert_eq!(column.max_value(), num_vals as u64 * 1000); |
| 145 | + assert_eq!(column.min_value(), 1000u64); |
| 146 | + |
| 147 | + // Can't apply gcd |
| 148 | + let mut buffer_without_gcd = Vec::new(); |
| 149 | + vals.pop(); |
| 150 | + vals.push(1001u64); |
| 151 | + crate::serialize( |
| 152 | + VecColumn::from(&vals), |
| 153 | + &mut buffer_without_gcd, |
| 154 | + &[codec_type], |
| 155 | + )?; |
| 156 | + let buffer_without_gcd = OwnedBytes::new(buffer_without_gcd); |
| 157 | + assert!(buffer_without_gcd.len() > buffer.len()); |
| 158 | + Ok(()) |
| 159 | + } |
| 160 | + |
| 161 | + #[test] |
| 162 | + fn test_fastfield_gcd_u64() -> io::Result<()> { |
| 163 | + for &codec_type in &[ |
| 164 | + FastFieldCodecType::Bitpacked, |
| 165 | + FastFieldCodecType::BlockwiseLinear, |
| 166 | + FastFieldCodecType::Linear, |
| 167 | + ] { |
| 168 | + test_fastfield_gcd_u64_with_codec(codec_type, 5500)?; |
| 169 | + } |
| 170 | + Ok(()) |
| 171 | + } |
| 172 | + |
| 173 | + #[test] |
| 174 | + pub fn test_fastfield2() { |
| 175 | + let test_fastfield = crate::serialize_and_load(&[100u64, 200u64, 300u64]); |
| 176 | + assert_eq!(test_fastfield.get_val(0), 100); |
| 177 | + assert_eq!(test_fastfield.get_val(1), 200); |
| 178 | + assert_eq!(test_fastfield.get_val(2), 300); |
| 179 | + } |
| 180 | + |
| 181 | + #[test] |
| 182 | + fn test_compute_gcd() { |
| 183 | + let test_compute_gcd_aux = |large, small, expected| { |
| 184 | + let large = NonZeroU64::new(large).unwrap(); |
| 185 | + let small = NonZeroU64::new(small).unwrap(); |
| 186 | + let expected = NonZeroU64::new(expected).unwrap(); |
| 187 | + assert_eq!(compute_gcd(small, large), expected); |
| 188 | + assert_eq!(compute_gcd(large, small), expected); |
| 189 | + }; |
| 190 | + test_compute_gcd_aux(1, 4, 1); |
| 191 | + test_compute_gcd_aux(2, 4, 2); |
| 192 | + test_compute_gcd_aux(10, 25, 5); |
| 193 | + test_compute_gcd_aux(25, 25, 25); |
| 194 | + } |
| 195 | + |
| 196 | + #[test] |
| 197 | + fn find_gcd_test() { |
| 198 | + assert_eq!(find_gcd([0].into_iter()), None); |
| 199 | + assert_eq!(find_gcd([0, 10].into_iter()), NonZeroU64::new(10)); |
| 200 | + assert_eq!(find_gcd([10, 0].into_iter()), NonZeroU64::new(10)); |
| 201 | + assert_eq!(find_gcd([].into_iter()), None); |
| 202 | + assert_eq!(find_gcd([15, 30, 5, 10].into_iter()), NonZeroU64::new(5)); |
| 203 | + assert_eq!(find_gcd([15, 16, 10].into_iter()), NonZeroU64::new(1)); |
| 204 | + assert_eq!(find_gcd([0, 5, 5, 5].into_iter()), NonZeroU64::new(5)); |
| 205 | + assert_eq!(find_gcd([0, 0].into_iter()), None); |
| 206 | + } |
| 207 | +} |
0 commit comments