|
30 | 30 | PATTERN_IPV4 = re.compile(r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$")
|
31 | 31 | PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:(\d+)$")
|
32 | 32 | PATTERN_ONION = re.compile(r"^([abcdefghijklmnopqrstuvwxyz234567]{16}\.onion):(\d+)$")
|
33 |
| -PATTERN_AGENT = re.compile(r"^(/Satoshi:0.14.(0|1|2|99)/|/Satoshi:0.15.(0|1|2|99)|/Satoshi:0.16.(0|1|2|99)/)$") |
| 33 | +PATTERN_AGENT = re.compile( |
| 34 | + r"^/Satoshi:(" |
| 35 | + r"0.14.(0|1|2|3|99)|" |
| 36 | + r"0.15.(0|1|2|99)|" |
| 37 | + r"0.16.(0|1|2|3|99)|" |
| 38 | + r"0.17.(0|0.1|1|2|99)|" |
| 39 | + r"0.18.(0|1|99)|" |
| 40 | + r"0.19.99" |
| 41 | + r")") |
34 | 42 |
|
35 | 43 | def parseline(line):
|
36 | 44 | sline = line.split()
|
@@ -99,78 +107,119 @@ def parseline(line):
|
99 | 107 | 'sortkey': sortkey,
|
100 | 108 | }
|
101 | 109 |
|
| 110 | +def dedup(ips): |
| 111 | + '''deduplicate by address,port''' |
| 112 | + d = {} |
| 113 | + for ip in ips: |
| 114 | + d[ip['ip'],ip['port']] = ip |
| 115 | + return list(d.values()) |
| 116 | + |
102 | 117 | def filtermultiport(ips):
|
103 | 118 | '''Filter out hosts with more nodes per IP'''
|
104 | 119 | hist = collections.defaultdict(list)
|
105 | 120 | for ip in ips:
|
106 | 121 | hist[ip['sortkey']].append(ip)
|
107 | 122 | return [value[0] for (key,value) in list(hist.items()) if len(value)==1]
|
108 | 123 |
|
| 124 | +def lookup_asn(net, ip): |
| 125 | + ''' |
| 126 | + Look up the asn for an IP (4 or 6) address by querying cymry.com, or None |
| 127 | + if it could not be found. |
| 128 | + ''' |
| 129 | + try: |
| 130 | + if net == 'ipv4': |
| 131 | + ipaddr = ip |
| 132 | + prefix = '.origin' |
| 133 | + else: # http://www.team-cymru.com/IP-ASN-mapping.html |
| 134 | + res = str() # 2001:4860:b002:23::68 |
| 135 | + for nb in ip.split(':')[:4]: # pick the first 4 nibbles |
| 136 | + for c in nb.zfill(4): # right padded with '0' |
| 137 | + res += c + '.' # 2001 4860 b002 0023 |
| 138 | + ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3 |
| 139 | + prefix = '.origin6' |
| 140 | + |
| 141 | + asn = int([x.to_text() for x in dns.resolver.query('.'.join( |
| 142 | + reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com', |
| 143 | + 'TXT').response.answer][0].split('\"')[1].split(' ')[0]) |
| 144 | + return asn |
| 145 | + except Exception: |
| 146 | + sys.stderr.write('ERR: Could not resolve ASN for "' + ip + '"\n') |
| 147 | + return None |
| 148 | + |
109 | 149 | # Based on Greg Maxwell's seed_filter.py
|
110 |
| -def filterbyasn(ips, max_per_asn, max_total): |
| 150 | +def filterbyasn(ips, max_per_asn, max_per_net): |
111 | 151 | # Sift out ips by type
|
112 | 152 | ips_ipv46 = [ip for ip in ips if ip['net'] in ['ipv4', 'ipv6']]
|
113 | 153 | ips_onion = [ip for ip in ips if ip['net'] == 'onion']
|
114 | 154 |
|
115 |
| - # Filter IPv46 by ASN |
| 155 | + # Filter IPv46 by ASN, and limit to max_per_net per network |
116 | 156 | result = []
|
117 |
| - asn_count = {} |
| 157 | + net_count = collections.defaultdict(int) |
| 158 | + asn_count = collections.defaultdict(int) |
118 | 159 | for ip in ips_ipv46:
|
119 |
| - if len(result) == max_total: |
120 |
| - break |
121 |
| - try: |
122 |
| - if ip['net'] == 'ipv4': |
123 |
| - ipaddr = ip['ip'] |
124 |
| - prefix = '.origin' |
125 |
| - else: # http://www.team-cymru.com/IP-ASN-mapping.html |
126 |
| - res = str() # 2001:4860:b002:23::68 |
127 |
| - for nb in ip['ip'].split(':')[:4]: # pick the first 4 nibbles |
128 |
| - for c in nb.zfill(4): # right padded with '0' |
129 |
| - res += c + '.' # 2001 4860 b002 0023 |
130 |
| - ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3 |
131 |
| - prefix = '.origin6' |
132 |
| - |
133 |
| - asn = int([x.to_text() for x in dns.resolver.query('.'.join( |
134 |
| - reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com', |
135 |
| - 'TXT').response.answer][0].split('\"')[1].split(' ')[0]) |
136 |
| - if asn not in asn_count: |
137 |
| - asn_count[asn] = 0 |
138 |
| - if asn_count[asn] == max_per_asn: |
139 |
| - continue |
140 |
| - asn_count[asn] += 1 |
141 |
| - result.append(ip) |
142 |
| - except: |
143 |
| - sys.stderr.write('ERR: Could not resolve ASN for "' + ip['ip'] + '"\n') |
144 |
| - |
145 |
| - # Add back Onions |
146 |
| - result.extend(ips_onion) |
| 160 | + if net_count[ip['net']] == max_per_net: |
| 161 | + continue |
| 162 | + asn = lookup_asn(ip['net'], ip['ip']) |
| 163 | + if asn is None or asn_count[asn] == max_per_asn: |
| 164 | + continue |
| 165 | + asn_count[asn] += 1 |
| 166 | + net_count[ip['net']] += 1 |
| 167 | + result.append(ip) |
| 168 | + |
| 169 | + # Add back Onions (up to max_per_net) |
| 170 | + result.extend(ips_onion[0:max_per_net]) |
147 | 171 | return result
|
148 | 172 |
|
| 173 | +def ip_stats(ips): |
| 174 | + hist = collections.defaultdict(int) |
| 175 | + for ip in ips: |
| 176 | + if ip is not None: |
| 177 | + hist[ip['net']] += 1 |
| 178 | + |
| 179 | + return '%6d %6d %6d' % (hist['ipv4'], hist['ipv6'], hist['onion']) |
| 180 | + |
149 | 181 | def main():
|
150 | 182 | lines = sys.stdin.readlines()
|
151 | 183 | ips = [parseline(line) for line in lines]
|
152 | 184 |
|
153 |
| - # Skip entries with valid address. |
| 185 | + print('\x1b[7m IPv4 IPv6 Onion Pass \x1b[0m', file=sys.stderr) |
| 186 | + print('%s Initial' % (ip_stats(ips)), file=sys.stderr) |
| 187 | + # Skip entries with invalid address. |
154 | 188 | ips = [ip for ip in ips if ip is not None]
|
| 189 | + print('%s Skip entries with invalid address' % (ip_stats(ips)), file=sys.stderr) |
| 190 | + # Skip duplicattes (in case multiple seeds files were concatenated) |
| 191 | + ips = dedup(ips) |
| 192 | + print('%s After removing duplicates' % (ip_stats(ips)), file=sys.stderr) |
155 | 193 | # Skip entries from suspicious hosts.
|
156 | 194 | ips = [ip for ip in ips if ip['ip'] not in SUSPICIOUS_HOSTS]
|
| 195 | + print('%s Skip entries from suspicious hosts' % (ip_stats(ips)), file=sys.stderr) |
157 | 196 | # Enforce minimal number of blocks.
|
158 | 197 | ips = [ip for ip in ips if ip['blocks'] >= MIN_BLOCKS]
|
| 198 | + print('%s Enforce minimal number of blocks' % (ip_stats(ips)), file=sys.stderr) |
159 | 199 | # Require service bit 1.
|
160 | 200 | ips = [ip for ip in ips if (ip['service'] & 1) == 1]
|
161 |
| - # Require at least 50% 30-day uptime. |
162 |
| - ips = [ip for ip in ips if ip['uptime'] > 50] |
| 201 | + print('%s Require service bit 1' % (ip_stats(ips)), file=sys.stderr) |
| 202 | + # Require at least 50% 30-day uptime for clearnet, 10% for onion. |
| 203 | + req_uptime = { |
| 204 | + 'ipv4': 50, |
| 205 | + 'ipv6': 50, |
| 206 | + 'onion': 10, |
| 207 | + } |
| 208 | + ips = [ip for ip in ips if ip['uptime'] > req_uptime[ip['net']]] |
| 209 | + print('%s Require minimum uptime' % (ip_stats(ips)), file=sys.stderr) |
163 | 210 | # Require a known and recent user agent.
|
164 | 211 | ips = [ip for ip in ips if PATTERN_AGENT.match(ip['agent'])]
|
| 212 | + print('%s Require a known and recent user agent' % (ip_stats(ips)), file=sys.stderr) |
165 | 213 | # Sort by availability (and use last success as tie breaker)
|
166 | 214 | ips.sort(key=lambda x: (x['uptime'], x['lastsuccess'], x['ip']), reverse=True)
|
167 | 215 | # Filter out hosts with multiple bitcoin ports, these are likely abusive
|
168 | 216 | ips = filtermultiport(ips)
|
| 217 | + print('%s Filter out hosts with multiple bitcoin ports' % (ip_stats(ips)), file=sys.stderr) |
169 | 218 | # Look up ASNs and limit results, both per ASN and globally.
|
170 | 219 | ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS)
|
| 220 | + print('%s Look up ASNs and limit results per ASN and per net' % (ip_stats(ips)), file=sys.stderr) |
171 | 221 | # Sort the results by IP address (for deterministic output).
|
172 | 222 | ips.sort(key=lambda x: (x['net'], x['sortkey']))
|
173 |
| - |
174 | 223 | for ip in ips:
|
175 | 224 | if ip['net'] == 'ipv6':
|
176 | 225 | print('[%s]:%i' % (ip['ip'], ip['port']))
|
|
0 commit comments