|
| 1 | +use sha1::{Digest, Sha1}; |
| 2 | + |
1 | 3 | // https://github.com/postgres/postgres/blob/27b77ecf9f4d5be211900eda54d8155ada50d696/src/include/catalog/partition.h#L20
|
2 | 4 | const PARTITION_HASH_SEED: u64 = 0x7A5B22367996DCFD;
|
3 | 5 |
|
| 6 | +#[derive(Debug, PartialEq, Copy, Clone)] |
| 7 | +pub enum ShardingFunction { |
| 8 | + PgBigintHash, |
| 9 | + Sha1, |
| 10 | +} |
| 11 | + |
4 | 12 | pub struct Sharder {
|
5 | 13 | shards: usize,
|
| 14 | + sharding_function: ShardingFunction, |
6 | 15 | }
|
7 | 16 |
|
8 | 17 | impl Sharder {
|
9 |
| - pub fn new(shards: usize) -> Sharder { |
10 |
| - Sharder { shards: shards } |
| 18 | + pub fn new(shards: usize, sharding_function: ShardingFunction) -> Sharder { |
| 19 | + Sharder { |
| 20 | + shards, |
| 21 | + sharding_function, |
| 22 | + } |
| 23 | + } |
| 24 | + |
| 25 | + pub fn shard(&self, key: i64) -> usize { |
| 26 | + match self.sharding_function { |
| 27 | + ShardingFunction::PgBigintHash => self.pg_bigint_hash(key), |
| 28 | + ShardingFunction::Sha1 => self.sha1(key), |
| 29 | + } |
11 | 30 | }
|
12 | 31 |
|
13 | 32 | /// Hash function used by Postgres to determine which partition
|
14 | 33 | /// to put the row in when using HASH(column) partitioning.
|
15 | 34 | /// Source: https://github.com/postgres/postgres/blob/27b77ecf9f4d5be211900eda54d8155ada50d696/src/common/hashfn.c#L631
|
16 | 35 | /// Supports only 1 bigint at the moment, but we can add more later.
|
17 |
| - pub fn pg_bigint_hash(&self, key: i64) -> usize { |
| 36 | + fn pg_bigint_hash(&self, key: i64) -> usize { |
18 | 37 | let mut lohalf = key as u32;
|
19 | 38 | let hihalf = (key >> 32) as u32;
|
20 | 39 | lohalf ^= if key >= 0 { hihalf } else { !hihalf };
|
21 | 40 | Self::combine(0, Self::pg_u32_hash(lohalf)) as usize % self.shards
|
22 | 41 | }
|
23 | 42 |
|
| 43 | + /// Example of a hashing function based on SHA1. |
| 44 | + fn sha1(&self, key: i64) -> usize { |
| 45 | + let mut hasher = Sha1::new(); |
| 46 | + |
| 47 | + hasher.update(&key.to_string().as_bytes()); |
| 48 | + |
| 49 | + let result = hasher.finalize(); |
| 50 | + |
| 51 | + // Convert the SHA1 hash into hex so we can parse it as a large integer. |
| 52 | + let hex = format!("{:x}", result); |
| 53 | + |
| 54 | + // Parse the last 8 bytes as an integer (8 bytes = bigint). |
| 55 | + let key = i64::from_str_radix(&hex[hex.len() - 8..], 16).unwrap() as usize; |
| 56 | + |
| 57 | + key % self.shards |
| 58 | + } |
| 59 | + |
24 | 60 | #[inline]
|
25 | 61 | fn rot(x: u32, k: u32) -> u32 {
|
26 | 62 | (x << k) | (x >> (32 - k))
|
@@ -109,36 +145,51 @@ mod test {
|
109 | 145 | // confirming that we implemented Postgres BIGINT hashing correctly.
|
110 | 146 | #[test]
|
111 | 147 | fn test_pg_bigint_hash() {
|
112 |
| - let sharder = Sharder::new(5); |
| 148 | + let sharder = Sharder::new(5, ShardingFunction::PgBigintHash); |
113 | 149 |
|
114 | 150 | let shard_0 = vec![1, 4, 5, 14, 19, 39, 40, 46, 47, 53];
|
115 | 151 |
|
116 | 152 | for v in shard_0 {
|
117 |
| - assert_eq!(sharder.pg_bigint_hash(v), 0); |
| 153 | + assert_eq!(sharder.shard(v), 0); |
118 | 154 | }
|
119 | 155 |
|
120 | 156 | let shard_1 = vec![2, 3, 11, 17, 21, 23, 30, 49, 51, 54];
|
121 | 157 |
|
122 | 158 | for v in shard_1 {
|
123 |
| - assert_eq!(sharder.pg_bigint_hash(v), 1); |
| 159 | + assert_eq!(sharder.shard(v), 1); |
124 | 160 | }
|
125 | 161 |
|
126 | 162 | let shard_2 = vec![6, 7, 15, 16, 18, 20, 25, 28, 34, 35];
|
127 | 163 |
|
128 | 164 | for v in shard_2 {
|
129 |
| - assert_eq!(sharder.pg_bigint_hash(v), 2); |
| 165 | + assert_eq!(sharder.shard(v), 2); |
130 | 166 | }
|
131 | 167 |
|
132 | 168 | let shard_3 = vec![8, 12, 13, 22, 29, 31, 33, 36, 41, 43];
|
133 | 169 |
|
134 | 170 | for v in shard_3 {
|
135 |
| - assert_eq!(sharder.pg_bigint_hash(v), 3); |
| 171 | + assert_eq!(sharder.shard(v), 3); |
136 | 172 | }
|
137 | 173 |
|
138 | 174 | let shard_4 = vec![9, 10, 24, 26, 27, 32, 37, 38, 42, 45];
|
139 | 175 |
|
140 | 176 | for v in shard_4 {
|
141 |
| - assert_eq!(sharder.pg_bigint_hash(v), 4); |
| 177 | + assert_eq!(sharder.shard(v), 4); |
| 178 | + } |
| 179 | + } |
| 180 | + |
| 181 | + #[test] |
| 182 | + fn test_sha1_hash() { |
| 183 | + let sharder = Sharder::new(12, ShardingFunction::Sha1); |
| 184 | + let ids = vec![ |
| 185 | + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, |
| 186 | + ]; |
| 187 | + let shards = vec![ |
| 188 | + 4, 7, 8, 3, 6, 0, 0, 10, 3, 11, 1, 7, 4, 4, 11, 2, 5, 0, 8, 3, |
| 189 | + ]; |
| 190 | + |
| 191 | + for (i, id) in ids.iter().enumerate() { |
| 192 | + assert_eq!(sharder.shard(*id), shards[i]); |
142 | 193 | }
|
143 | 194 | }
|
144 | 195 | }
|
0 commit comments