Skip to content
This repository was archived by the owner on Nov 6, 2020. It is now read-only.

Commit 485d4aa

Browse files
jimpodebris
authored andcommitted
Fix XOR distance calculation in discovery Kademlia impl (#8589)
* network-devp2p: Test for discovery bucket insertion. All test values are randomly generated and the assertions are checked manually. Test fails because distance metric is implemented incorrectly. * network-devp2p: Fix discovery distance function. The Kademlia distance function (XOR) was implemented incorrectly as a population count. * network-devp2p: Refactor nearest_node_entries to be on instance. Optimizations are possible with more access to the discovery state. * network-devp2p: Fix loss of precision in nearest_node_entries. * network-devp2p: More efficient nearest node search. The discovery algorithm to identify the nearest k nodes does not need to scan all entries in all buckets.
1 parent 799ae29 commit 485d4aa

File tree

1 file changed

+172
-49
lines changed

1 file changed

+172
-49
lines changed

util/network-devp2p/src/discovery.rs

Lines changed: 172 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
use ethcore_bytes::Bytes;
1818
use std::net::SocketAddr;
19-
use std::collections::{HashSet, HashMap, BTreeMap, VecDeque};
19+
use std::collections::{HashSet, HashMap, VecDeque};
2020
use std::mem;
2121
use std::default::Default;
2222
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
@@ -34,9 +34,8 @@ use network::IpFilter;
3434

3535
use PROTOCOL_VERSION;
3636

37-
const ADDRESS_BYTES_SIZE: u32 = 32; // Size of address type in bytes.
38-
const ADDRESS_BITS: u32 = 8 * ADDRESS_BYTES_SIZE; // Denoted by n in [Kademlia].
39-
const NODE_BINS: u32 = ADDRESS_BITS - 1; // Size of m_state (excludes root, which is us).
37+
const ADDRESS_BYTES_SIZE: usize = 32; // Size of address type in bytes.
38+
const ADDRESS_BITS: usize = 8 * ADDRESS_BYTES_SIZE; // Denoted by n in [Kademlia].
4039
const DISCOVERY_MAX_STEPS: u16 = 8; // Max iterations of discovery. (discover)
4140
const BUCKET_SIZE: usize = 16; // Denoted by k in [Kademlia]. Number of nodes stored in each bucket.
4241
const ALPHA: usize = 3; // Denoted by \alpha in [Kademlia]. Number of concurrent FindNode requests.
@@ -119,7 +118,7 @@ impl Discovery {
119118
discovery_round: 0,
120119
discovery_id: NodeId::new(),
121120
discovery_nodes: HashSet::new(),
122-
node_buckets: (0..NODE_BINS).map(|_| NodeBucket::new()).collect(),
121+
node_buckets: (0..ADDRESS_BITS).map(|_| NodeBucket::new()).collect(),
123122
udp_socket: socket,
124123
send_queue: VecDeque::new(),
125124
check_timestamps: true,
@@ -155,8 +154,16 @@ impl Discovery {
155154
fn update_node(&mut self, e: NodeEntry) {
156155
trace!(target: "discovery", "Inserting {:?}", &e);
157156
let id_hash = keccak(e.id);
157+
let dist = match Discovery::distance(&self.id_hash, &id_hash) {
158+
Some(dist) => dist,
159+
None => {
160+
warn!(target: "discovery", "Attempted to update own entry: {:?}", e);
161+
return;
162+
}
163+
};
164+
158165
let ping = {
159-
let bucket = &mut self.node_buckets[Discovery::distance(&self.id_hash, &id_hash) as usize];
166+
let bucket = &mut self.node_buckets[dist];
160167
let updated = if let Some(node) = bucket.nodes.iter_mut().find(|n| n.address.id == e.id) {
161168
node.address = e.clone();
162169
node.timeout = None;
@@ -181,7 +188,15 @@ impl Discovery {
181188

182189
/// Removes the timeout of a given NodeId if it can be found in one of the discovery buckets
183190
fn clear_ping(&mut self, id: &NodeId) {
184-
let bucket = &mut self.node_buckets[Discovery::distance(&self.id_hash, &keccak(id)) as usize];
191+
let dist = match Discovery::distance(&self.id_hash, &keccak(id)) {
192+
Some(dist) => dist,
193+
None => {
194+
warn!(target: "discovery", "Received ping from self");
195+
return
196+
}
197+
};
198+
199+
let bucket = &mut self.node_buckets[dist];
185200
if let Some(node) = bucket.nodes.iter_mut().find(|n| &n.address.id == id) {
186201
node.timeout = None;
187202
}
@@ -212,7 +227,7 @@ impl Discovery {
212227
trace!(target: "discovery", "Starting round {:?}", self.discovery_round);
213228
let mut tried_count = 0;
214229
{
215-
let nearest = Discovery::nearest_node_entries(&self.discovery_id, &self.node_buckets).into_iter();
230+
let nearest = self.nearest_node_entries(&self.discovery_id).into_iter();
216231
let nearest = nearest.filter(|x| !self.discovery_nodes.contains(&x.id)).take(ALPHA).collect::<Vec<_>>();
217232
for r in nearest {
218233
let rlp = encode_list(&(&[self.discovery_id.clone()][..]));
@@ -233,17 +248,17 @@ impl Discovery {
233248
self.discovery_round += 1;
234249
}
235250

236-
fn distance(a: &H256, b: &H256) -> u32 {
237-
let d = *a ^ *b;
238-
let mut ret:u32 = 0;
239-
for i in 0..32 {
240-
let mut v: u8 = d[i];
241-
while v != 0 {
242-
v >>= 1;
243-
ret += 1;
251+
/// The base 2 log of the distance between a and b using the XOR metric.
252+
fn distance(a: &H256, b: &H256) -> Option<usize> {
253+
for i in (0..ADDRESS_BYTES_SIZE).rev() {
254+
let byte_index = ADDRESS_BYTES_SIZE - i - 1;
255+
let d: u8 = a[byte_index] ^ b[byte_index];
256+
if d != 0 {
257+
let high_bit_index = 7 - d.leading_zeros() as usize;
258+
return Some(i * 8 + high_bit_index);
244259
}
245260
}
246-
ret
261+
None // a and b are equal, so log distance is -inf
247262
}
248263

249264
fn ping(&mut self, node: &NodeEndpoint) {
@@ -286,36 +301,53 @@ impl Discovery {
286301
Ok(())
287302
}
288303

289-
fn nearest_node_entries(target: &NodeId, buckets: &[NodeBucket]) -> Vec<NodeEntry> {
290-
let mut found: BTreeMap<u32, Vec<&NodeEntry>> = BTreeMap::new();
291-
let mut count = 0;
304+
fn nearest_node_entries(&self, target: &NodeId) -> Vec<NodeEntry> {
292305
let target_hash = keccak(target);
306+
let target_distance = self.id_hash ^ target_hash;
307+
308+
let mut ret = Vec::<NodeEntry>::with_capacity(BUCKET_SIZE);
309+
310+
// Sort bucket entries by distance to target and append to end of result vector.
311+
let append_bucket = |results: &mut Vec<NodeEntry>, bucket: &NodeBucket| -> bool {
312+
let mut sorted_entries: Vec<&BucketEntry> = bucket.nodes.iter().collect();
313+
sorted_entries.sort_unstable_by_key(|entry| entry.id_hash ^ target_hash);
314+
315+
let remaining_capacity = results.capacity() - results.len();
316+
let to_append = if remaining_capacity < sorted_entries.len() {
317+
&sorted_entries[0..remaining_capacity]
318+
} else {
319+
&sorted_entries
320+
};
321+
for entry in to_append.iter() {
322+
results.push(entry.address.clone());
323+
}
324+
results.len() == results.capacity()
325+
};
293326

294-
// Sort nodes by distance to target
295-
for bucket in buckets {
296-
for node in &bucket.nodes {
297-
let distance = Discovery::distance(&target_hash, &node.id_hash);
298-
found.entry(distance).or_insert_with(Vec::new).push(&node.address);
299-
if count == BUCKET_SIZE {
300-
// delete the most distant element
301-
let remove = {
302-
let (key, last) = found.iter_mut().next_back().expect("Last element is always Some when count > 0");
303-
last.pop();
304-
if last.is_empty() { Some(key.clone()) } else { None }
305-
};
306-
if let Some(remove) = remove {
307-
found.remove(&remove);
308-
}
309-
}
310-
else {
311-
count += 1;
327+
// This algorithm leverages the structure of the routing table to efficiently find the
328+
// nearest entries to a target hash. First, we compute the XOR distance from this node to
329+
// the target. On a first pass, we iterate from the MSB of the distance, stopping at any
330+
// buckets where the distance bit is set, and skipping the buckets where it is unset. These
331+
// must be in order the nearest to the target. On a second pass, we traverse from LSB to
332+
// MSB, appending the buckets skipped on the first pass. The reason this works is that all
333+
// entries in bucket i have a common prefix of length exactly 32 - i - 1 with the ID of this
334+
// node.
335+
336+
for i in 0..ADDRESS_BITS {
337+
if ((target_distance[i / 8] << (i % 8)) & 0x80) != 0 {
338+
let bucket = &self.node_buckets[ADDRESS_BITS - i - 1];
339+
if !bucket.nodes.is_empty() && append_bucket(&mut ret, bucket) {
340+
return ret;
312341
}
313342
}
314343
}
315-
316-
let mut ret:Vec<NodeEntry> = Vec::new();
317-
for nodes in found.values() {
318-
ret.extend(nodes.iter().map(|&n| n.clone()));
344+
for i in (0..ADDRESS_BITS).rev() {
345+
if ((target_distance[i / 8] << (i % 8)) & 0x80) == 0 {
346+
let bucket = &self.node_buckets[ADDRESS_BITS - i - 1];
347+
if !bucket.nodes.is_empty() && append_bucket(&mut ret, bucket) {
348+
return ret;
349+
}
350+
}
319351
}
320352
ret
321353
}
@@ -453,7 +485,7 @@ impl Discovery {
453485
let target: NodeId = rlp.val_at(0)?;
454486
let timestamp: u64 = rlp.val_at(1)?;
455487
self.check_timestamp(timestamp)?;
456-
let nearest = Discovery::nearest_node_entries(&target, &self.node_buckets);
488+
let nearest = self.nearest_node_entries(&target);
457489
if nearest.is_empty() {
458490
return Ok(None);
459491
}
@@ -614,7 +646,7 @@ mod tests {
614646
}
615647
discovery2.round();
616648
}
617-
assert_eq!(Discovery::nearest_node_entries(&NodeId::new(), &discovery2.node_buckets).len(), 3)
649+
assert_eq!(discovery2.nearest_node_entries(&NodeId::new()).len(), 3)
618650
}
619651

620652
#[test]
@@ -625,31 +657,122 @@ mod tests {
625657
for _ in 0..1200 {
626658
discovery.add_node(NodeEntry { id: NodeId::random(), endpoint: ep.clone() });
627659
}
628-
assert!(Discovery::nearest_node_entries(&NodeId::new(), &discovery.node_buckets).len() <= 16);
660+
assert!(discovery.nearest_node_entries(&NodeId::new()).len() <= 16);
629661
let removed = discovery.check_expired(true).len();
630662
assert!(removed > 0);
631663
}
632664

633665
#[test]
634666
fn find_nearest_saturated() {
635667
use super::*;
636-
let mut buckets: Vec<_> = (0..256).map(|_| NodeBucket::new()).collect();
668+
669+
let key = Random.generate().unwrap();
637670
let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40447").unwrap(), udp_port: 40447 };
671+
let mut discovery = Discovery::new(&key, ep.address.clone(), ep.clone(), 0, IpFilter::default());
672+
638673
for _ in 0..(16 + 10) {
639-
buckets[0].nodes.push_back(BucketEntry {
674+
discovery.node_buckets[0].nodes.push_back(BucketEntry {
640675
address: NodeEntry { id: NodeId::new(), endpoint: ep.clone() },
641676
timeout: None,
642677
id_hash: keccak(NodeId::new()),
643678
});
644679
}
645-
let nearest = Discovery::nearest_node_entries(&NodeId::new(), &buckets);
680+
let nearest = discovery.nearest_node_entries(&NodeId::new());
646681
assert_eq!(nearest.len(), 16)
647682
}
648683

684+
#[test]
685+
fn routing_table_insertions_lookups() {
686+
use super::*;
687+
let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40448").unwrap(), udp_port: 40447 };
688+
let node_ids_hex: [&str; 32] = [
689+
"22536fa57acc12c4993295cbc26fef4550513496712b301ad2283d356c8108521244a362e64e6d907a0d0b4e65526699c5ae3cfebfc680505fe3b33d50672835",
690+
"22c482f42401546f8dd7ed6b1c0cad976da6630730f1116614579ccb084791a528ff2676bfe94434de80e5d7e479f1ea1d7737077da3bd5e69a0f3e5bf596091",
691+
"234c73e3a8f6835a7f9a9d2a896bff4908d66d21d5433a2c37d94f1fa9a6ca17d02388f31013ff87e3ad86506e76bd1006b9cac3815974a2b47c8d4f2124697e",
692+
"2a5aaf4e2046c521e890dc82313c6151a55078f045a7e3d259f168238d029271cdd9a0943468d45c1e36a34a8a6d4de4b0262e48d3c8cfdd4c2aab5df42926b9",
693+
"341d8c94d9670461186cfc1f66d4246cb12384940e9f621ec8d6c216b5d037cde5f7a41b70474ca36ced4a4f2fe91c9dc5a24a128414672661f78e8611d54bfd",
694+
"3d9fd01851f3ae1bfd06b48e89738f29f9a2b4dce3ab7864df4fccca55d1ac88044956ba47d0c4cb44a19924626a3a3aa5a4de8958365cb7385111ce7b929200",
695+
"406d5507a7fbc194a495800ae8cf408093336febc24d03d6c63756f522274ab02146ceb1b0213291a9a1544680503837519f88f1e8677d921de62c82935b4e6c",
696+
"4c537f00805f320616ee49c7bc36e1d7e52a04a782b0cc00fd3d6b77200b027cef5f875ed38f1167fef4b02d7bd49a661812301d9d680bb62297131204c035f9",
697+
"4fc8e3fdbdd7acad82b283ac52c121b805f3b15ffcaa6b2ca67b9e375aa88e978951ffa3d03ee13be99f0ee987db0bbfc6a7ca02b175e9123d79826025b4089d",
698+
"55b5042a6910bc908a0520966e8cbcc92ac299bdb7efbfbcf703df1506fa0f9b09c5eeb930080de848d2864cca71f885942852c51233db0ee46fe0447306d61f",
699+
"5d24f28b350c4c37fc4dad7f418e029992c9e4ac356bb3d9a1356ba1076339863c05044d7ceba233c65779401f8a3b38fe67b6a592c1be4834dc869f7bb932eb",
700+
"5f6edaf2f2ae3003f4b4ff90b8e71a717c832c71a634d96e77fe046f9a88adc8de5718ff3c47659aea4cead5376df5b731e1b6530e6b0999f56ad75d4dabd3f6",
701+
"6214c04211efe91abd23d65e2dc8e711b06d4fb13dcfd65b691dc51f58455b2145f9b38f523b72a45a12705a28d389308a34455720d774c9b805326df42b5a63",
702+
"69df92573ddbbce88b72a930843dbb70728b2a020e0cc4e8ba805dcf7f19297bfc5def4ca447e9e6ec66971be1815b8f49042720431f698b6a87a185d94fa6c8",
703+
"72ffc23de007cf8b6f4a117f7427b532d05861c314344ffa265175f57ee45dae041a710a4dc74124dba1dabdc0f52dfd21e3154d1d4285aab529810c6161d623",
704+
"80b567f279a9512f3a66ebd8f87a93acd4d50bf66f5eff6d04039c1f5838e37021e981539659b33e0644b243fc9671209a80cbef40d1bcf7c7117d353cb45532",
705+
"9009dc9e3bf50595f84271f46d4c7a5ad6971f7d2ffce1905bfc40a407d34fc5e2dcebd92746eadcd2c5fa4d5aaccb0e01b542d506b361851df3f19e6bc629a3",
706+
"95264f56e091efeba911003fd01eeb2c81f6fc4bb7b10c92e4c7bfaf460b7246d232e61ad8a223d74870981a84e15b2d5134c25d931cb860c6912b20a2d3ac01",
707+
"96013a472a9f7ff9c5c76b5ca958f14ee510d826703aa41d4c88eac51d30d14229b9f19f6e0469c37aaa6d2136a978a4aaa38ca766f48e53e569f84e44252962",
708+
"a513c988cf8480ad2992caa64e3fa059ce07efda260dfeefed78e1d41ea3f97844603b8a9737eb633086fd9ac2f201200cb656cda8a91bf6cc500d6039db6f53",
709+
"ab3311f38e3641c8b3b1fd36dd7f94b148166e267258e840d29d1859537c74f202bd3342359b3623f96c23fa662d1b65182a898bf20343744b37cb265182e500",
710+
"ac8f41dbd637891a08c9cf715c23577bdd431ba40231682a5a9ba7fd6cb6d66c04f63d6d65c7d9f8737e641e05fdbeede57138a174f0d55e7835575dd6cddd98",
711+
"accdad251888d53e4e18efee1e0d749d050216b14896efb657e9c7b1b78dab82a5b6fb3234017aa19a2f50475d73960f352d308b2e0e841cbebaf418362a4f21",
712+
"b138622208f74d2b8e8fc10bcd4cf3302685cd77d339280a939474b92be8b93e441c50709e25c82cc88a2a4207e9f2938912d60600226efe322b43c6ef5e7aef",
713+
"b4f64e1fa6a5cd6198b2515bde63fbdabaf7e7a31dbaf5369babbda4b8cd0bf5025ac4b7d2d6e6e3bc76c890df585d28d4815e464c8792ef677df9206864a12b",
714+
"c1136e08a27c93812ae2dd47201d9e81c82d1995001b88dba9eec700e1d3385dfaf7ae834226c3c90a138f1808cd10b5502f49ee774a2bc707f34bd7d160b7bd",
715+
"c203ae9b5d1953b0ac462e66338800ec26982e2af54bd444fc8978973191633d4f483e31b28233c07bb99f34d57c680fa5f8e093e64f13b235005b7ab6e2d594",
716+
"c2e1067c58a9948e773e0a3637d946e26d95762f89ec9d35e2ad84f770309d94168d4e112c78d62b60efc6216bc5d31475f24307b1b8e0fa8dcbb18a10cb85f5",
717+
"d60ecb1a89e0d5aeff14c9a95da9f5492eb15871c53563b86b7c5ddf0da74b4c29e682fdd22aae2290e0b16ef4b6d707ef55396ca98f755c95b689cf65ce5f80",
718+
"df5ad4ea6242929df86f2162d1cc62b0e0a6f0a03428a39dea98f6a689335b5ceaf1f0696c17b717b141aeb45a29108d95c3a7d2d1d0bb3441219504ae672917",
719+
"e1268f5dd9552a11989df9d4953bb388e7466711b2bd9882a3ed4d0767a21f046c53c20f9a18d66bae1d6a5544492857ddecb0b5b4818bd4557be252ddd66c71",
720+
"e626019dc0b50b9e254461f19d29e69a4669c5256134a6352c6c30d3bc55d201a5b43fc2e006556cfaf29765b683e807e03093798942826244e4ee9e47c75d3f",
721+
];
722+
let node_entries = node_ids_hex.iter()
723+
.map(|node_id_hex| NodeId::from_str(node_id_hex).unwrap())
724+
.map(|node_id| NodeEntry { id: node_id, endpoint: ep.clone() })
725+
.collect::<Vec<_>>();
726+
727+
let secret_hex = "6c71d1b8930d29e6371be1081f2c909c64b46440a1716314c3c9df995cb3aed1";
728+
let key = Secret::from_str(secret_hex)
729+
.and_then(|secret| KeyPair::from_secret(secret))
730+
.unwrap();
731+
let mut discovery = Discovery::new(&key, ep.address.clone(), ep.clone(), 0, IpFilter::default());
732+
733+
node_entries.iter().for_each(|entry| discovery.update_node(entry.clone()));
734+
735+
let expected_bucket_sizes = vec![
736+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
737+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
738+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
739+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
740+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
741+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
742+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
743+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
744+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
745+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
746+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
747+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
748+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
749+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
750+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
751+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7, 8, 12
752+
];
753+
let actual_bucket_sizes = discovery.node_buckets.iter()
754+
.map(|ref bucket| bucket.nodes.len())
755+
.collect::<Vec<_>>();
756+
assert_eq!(actual_bucket_sizes, expected_bucket_sizes);
757+
758+
for entry in &node_entries {
759+
let nearest = discovery.nearest_node_entries(&entry.id);
760+
assert_eq!(nearest.len(), 16);
761+
assert_eq!(nearest[0].id, entry.id);
762+
763+
let mut expected_ids: Vec<NodeId> = node_entries.iter().map(|entry| entry.id).collect();
764+
expected_ids.sort_unstable_by_key(|id| keccak(id) ^ keccak(entry.id));
765+
expected_ids.resize(BUCKET_SIZE, NodeId::default());
766+
767+
let actual_ids: Vec<NodeId> = nearest.iter().map(|entry| entry.id).collect();
768+
assert_eq!(actual_ids, expected_ids);
769+
}
770+
}
771+
649772
#[test]
650773
fn packets() {
651774
let key = Random.generate().unwrap();
652-
let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40447").unwrap(), udp_port: 40447 };
775+
let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40449").unwrap(), udp_port: 40449 };
653776
let mut discovery = Discovery::new(&key, ep.address.clone(), ep.clone(), 0, IpFilter::default());
654777
discovery.check_timestamps = false;
655778
let from = SocketAddr::from_str("99.99.99.99:40445").unwrap();

0 commit comments

Comments
 (0)