Skip to content

Commit 9e6f7d5

Browse files
committed
refactor checkers, new fields(ip, city, country_code, region)
1 parent dc1d9c3 commit 9e6f7d5

File tree

7 files changed

+219
-67
lines changed

7 files changed

+219
-67
lines changed

checkers/base_checker.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import ssl
2+
3+
import aiosocks
4+
import asyncio
5+
6+
import async_requests
7+
8+
from aiosocks.connector import ProxyConnector, ProxyClientRequest
9+
import aiohttp
10+
11+
12+
class CheckerResult:
13+
# TODO: change to properties with validation
14+
ipv4 = None
15+
ipv6 = None
16+
city = None
17+
region = None
18+
country_code = None
19+
# tuple
20+
location_coordinates = None
21+
organization_name = None
22+
23+
def update_from_other(self, other):
24+
def set_attr_if_is_not_none(attribute_name, first_obj, second_obj):
25+
second_val = getattr(second_obj, attribute_name)
26+
if second_val is not None:
27+
setattr(first_obj, attribute_name, second_val )
28+
29+
set_attr_if_is_not_none('ipv4', self, other)
30+
set_attr_if_is_not_none('ipv6', self, other)
31+
set_attr_if_is_not_none('city', self, other)
32+
set_attr_if_is_not_none('region', self, other)
33+
set_attr_if_is_not_none('country_code', self, other)
34+
set_attr_if_is_not_none('location_coordinates', self, other)
35+
set_attr_if_is_not_none('organization_name', self, other)
36+
37+
38+
class BaseChecker:
39+
def __init__(self, url=None, request_type="GET", timeout=10):
40+
self.request_type = request_type
41+
self.timeout = timeout
42+
self.url = url
43+
44+
async def check(self, proxy_address: str, timeout: int=None) -> tuple:
45+
"""
46+
Checks proxy and returns additional information if such was provided by checker server
47+
48+
:param proxy_address: string representing proxy ("http://user@qwerty@127.0.0.1:8080")
49+
:param timeout: overrides timeout if not None
50+
:return: tuple where first item is bool indication whether proxy is working or not
51+
and second one is additional information structure with information like white ip address, country and so on
52+
"""
53+
54+
checker_result = CheckerResult()
55+
if self.url is None:
56+
raise Exception()
57+
58+
headers = {
59+
'User-Agent': async_requests.get_random_user_agent()
60+
}
61+
62+
conn = ProxyConnector(remote_resolve=True)
63+
64+
timeout = timeout if timeout is not None else self.timeout
65+
66+
with aiohttp.ClientSession(connector=conn, request_class=ProxyClientRequest) as session:
67+
try:
68+
async with session.request(
69+
self.request_type, self.url, proxy=proxy_address, timeout=timeout, headers=headers) as \
70+
response:
71+
is_working = await self._check(response, checker_result)
72+
except (aiohttp.client_exceptions.ServerDisconnectedError,
73+
aiohttp.client_exceptions.ClientHttpProxyError,
74+
aiohttp.client_exceptions.ClientProxyConnectionError,
75+
aiohttp.client_exceptions.ClientResponseError,
76+
aiosocks.errors.SocksError,
77+
aiosocks.SocksError,
78+
asyncio.TimeoutError,
79+
aiohttp.client_exceptions.ClientOSError,
80+
ssl.CertificateError,
81+
) as ex:
82+
is_working = False
83+
message = str(ex)
84+
if "file" in message:
85+
print(message)
86+
# TODO: check for "Too many open files"
87+
88+
return is_working, checker_result
89+
90+
async def _check(self, response: aiohttp.ClientResponse, checker_result: CheckerResult) -> bool:
91+
"""
92+
Implement this method. It will get response from url with http method you provided in constructor
93+
94+
:param response: aiohttp response
95+
:param checker_result: fill this structure with information like ip address
96+
:return: whether proxy is working or not
97+
"""
98+
raise NotImplemented()

checkers/d3d_info_checker.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import aiohttp
2+
3+
from checkers.base_checker import BaseChecker, CheckerResult
4+
5+
6+
class D3DInfoChecker(BaseChecker):
7+
def __init__(self, timeout=10):
8+
super(D3DInfoChecker, self).__init__("https://pikagraphs.d3d.info/OK/", timeout=timeout)
9+
10+
async def _check(self, response: aiohttp.ClientResponse, checker_result: CheckerResult):
11+
return (await response.text()) == 'OK'

checkers/ipinfo_io_checker.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import traceback
2+
3+
import aiohttp
4+
5+
from checkers.base_checker import BaseChecker, CheckerResult
6+
7+
8+
class IPInfoIOChecker(BaseChecker):
9+
def __init__(self, timeout=10):
10+
super(IPInfoIOChecker, self).__init__("https://ipinfo.io/json", timeout=timeout)
11+
12+
async def _check(self, response: aiohttp.ClientResponse, checker_result: CheckerResult):
13+
if response.status != 200:
14+
return False
15+
16+
try:
17+
json_result = await response.json()
18+
checker_result.ipv4 = json_result['ip']
19+
checker_result.city = json_result['city']
20+
checker_result.region = json_result['region']
21+
checker_result.country_code = json_result['country']
22+
checker_result.location_coordinates = tuple(float(x) for x in json_result['loc'].split(','))
23+
checker_result.organization_name = json_result['org']
24+
return True
25+
# TODO: change to actual exception
26+
except BaseException as ex:
27+
traceback.print_exc()
28+
raise ex
29+
30+
return False

models.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ class Proxy(Base):
3535
bad_uptime = Column(Integer, nullable=True, default=None)
3636
# in microseconds
3737
response_time = Column(Integer, nullable=True, default=None)
38+
_white_ipv4 = Column(String(16), nullable=True)
39+
_white_ipv6 = Column(String(16), nullable=True)
40+
city = Column(String(), nullable=True)
41+
region = Column(String(), nullable=True)
42+
country_code = Column(String(3), nullable=True)
3843

3944
def get_raw_protocol(self):
4045
return self.raw_protocol
@@ -55,6 +60,22 @@ def protocol(self, protocol):
5560
def bad_proxy(self):
5661
return self.number_of_bad_checks > 0
5762

63+
@property
64+
def white_ipv4(self):
65+
return self._white_ipv4
66+
67+
@white_ipv4.setter
68+
def white_ipv4(self, value):
69+
self._white_ipv4 = value
70+
71+
@property
72+
def white_ipv6(self):
73+
return self._white_ipv6
74+
75+
@white_ipv6.setter
76+
def white_ipv6(self, value):
77+
self._white_ipv6 = value
78+
5879
def to_url(self, protocol=None):
5980
address = protocol if protocol is not None else self.PROTOCOLS[int(self.raw_protocol)]
6081
address += "://"

processor.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import collectors_list
44
import proxy_utils
5+
from checkers.base_checker import CheckerResult
56

67
from proxy_py import settings
78
from models import Proxy
@@ -215,7 +216,9 @@ async def process_proxy(self, raw_protocol: int, auth_data: str, domain: str, po
215216

216217
try:
217218
start_checking_time = time.time()
218-
check_result = await proxy_utils.check_proxy(proxy_url, aiohttp_proxy_check_session)
219+
check_result, checker_additional_info = await proxy_utils.check_proxy(
220+
proxy_url, aiohttp_proxy_check_session)
221+
219222
end_checking_time = time.time()
220223

221224
if check_result:
@@ -226,7 +229,8 @@ async def process_proxy(self, raw_protocol: int, auth_data: str, domain: str, po
226229
domain,
227230
port,
228231
start_checking_time,
229-
end_checking_time
232+
end_checking_time,
233+
checker_additional_info
230234
)
231235
else:
232236
self.logger.debug("proxy {0} doesn't work".format(proxy_url))
@@ -260,7 +264,7 @@ async def process_proxy(self, raw_protocol: int, auth_data: str, domain: str, po
260264

261265
@staticmethod
262266
def create_or_update_proxy(raw_protocol: Proxy.PROTOCOLS, auth_data, domain, port,
263-
start_checking_time, end_checking_time):
267+
start_checking_time, end_checking_time, additional_info: CheckerResult):
264268
if raw_protocol is None or domain is None or port is None or auth_data is None or start_checking_time is None\
265269
or end_checking_time is None:
266270
raise Exception("Bad arguments")
@@ -297,6 +301,18 @@ def create_or_update_proxy(raw_protocol: Proxy.PROTOCOLS, auth_data, domain, por
297301
proxy.number_of_bad_checks = 0
298302
proxy.last_check_time = int(time.time())
299303

304+
if additional_info.ipv4 is not None:
305+
proxy.white_ipv4 = additional_info.ipv4
306+
307+
if additional_info.city is not None:
308+
proxy.city = additional_info.city
309+
310+
if additional_info.region is not None:
311+
proxy.region = additional_info.region
312+
313+
if additional_info.country_code is not None:
314+
proxy.country_code = additional_info.country_code.strip().lower()
315+
300316
checking_time = int(end_checking_time - start_checking_time)
301317
if checking_time > settings.PROXY_CHECKING_TIMEOUT:
302318
checking_time = settings.PROXY_CHECKING_TIMEOUT

proxy_py/_settings.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from checkers.d3d_info_checker import D3DInfoChecker
2+
from checkers.ipinfo_io_checker import IPInfoIOChecker
3+
14
import os
25

36
DATABASE_CONNECTION_ARGS = (
@@ -20,7 +23,7 @@
2023
BAD_PROXY_CHECKING_PERIOD = 2 * 60 * 60
2124
DEAD_PROXY_CHECKING_PERIOD = 24 * 60 * 60
2225
DEAD_PROXY_THRESHOLD = 6
23-
REMOVE_ON_N_BAD_CHECKS = DEAD_PROXY_THRESHOLD + 21
26+
REMOVE_ON_N_BAD_CHECKS = DEAD_PROXY_THRESHOLD + 64
2427
PROXY_CHECKING_TIMEOUT = 20
2528

2629
PROXY_PROVIDER_SERVER_ADDRESS = {
@@ -29,11 +32,11 @@
2932
}
3033

3134
_PROXY_PROVIDER_SERVER_API_CONFIG_FETCH_CONFIG = {
32-
'fields': ['address', 'protocol', 'auth_data', 'domain', 'port', 'last_check_time',
33-
'number_of_bad_checks', 'bad_proxy', 'uptime', 'response_time'],
35+
'fields': ['address', 'protocol', 'auth_data', 'domain', 'port', 'last_check_time', 'number_of_bad_checks',
36+
'bad_proxy', 'uptime', 'response_time', 'white_ipv4', 'white_ipv6', 'city', 'country_code', 'region'],
3437
'filter_fields': ['last_check_time', 'protocol', 'number_of_bad_checks', 'bad_proxy', 'uptime',
3538
'response_time'],
36-
'order_by_fields': ['last_check_time', 'number_of_bad_checks', 'uptime', 'response_time'],
39+
'order_by_fields': ['last_check_time', 'number_of_bad_checks', 'uptime', 'response_time', 'country_code'],
3740
'default_order_by_fields': ['response_time', ],
3841
}
3942

@@ -46,3 +49,12 @@
4649
}
4750
}
4851
}
52+
53+
# can be either "or" or "and". "Or" means to consider proxy as working if at least one checker checked it OK,
54+
# if "and" proxy is considered as working only when all checkers said that proxy is good
55+
PROXY_CHECKING_CONDITION = 'or'
56+
57+
PROXY_CHECKERS = [
58+
IPInfoIOChecker(PROXY_CHECKING_TIMEOUT),
59+
D3DInfoChecker(PROXY_CHECKING_TIMEOUT),
60+
]

proxy_utils.py

Lines changed: 24 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,26 @@
1-
import traceback
1+
from checkers.base_checker import CheckerResult
2+
from proxy_py import settings
23

3-
import sys
44

5-
from proxy_py import settings
6-
import async_requests
7-
8-
import asyncio
9-
import ssl
10-
import aiohttp
11-
import aiosocks
12-
import os
13-
14-
from aiosocks.connector import ProxyConnector, ProxyClientRequest
15-
16-
17-
# TODO: add multiple checks with several sites
18-
async def check_proxy(proxy_url: str, session=None, timeout=None):
19-
try:
20-
res = await _request(
21-
'get',
22-
'https://pikagraphs.d3d.info/OK/',
23-
# 'https://wtfismyip.com/text',
24-
proxy_url,
25-
settings.PROXY_CHECKING_TIMEOUT if timeout is None else timeout,
26-
session
27-
)
28-
29-
if res.status == 200 and res.text == "OK":
30-
return True
31-
except (aiohttp.client_exceptions.ServerDisconnectedError,
32-
aiohttp.client_exceptions.ClientHttpProxyError,
33-
aiohttp.client_exceptions.ClientProxyConnectionError,
34-
aiohttp.client_exceptions.ClientResponseError,
35-
aiosocks.errors.SocksError,
36-
aiosocks.SocksError,
37-
asyncio.TimeoutError,
38-
aiohttp.client_exceptions.ClientOSError,
39-
ssl.CertificateError,
40-
) as ex:
41-
message = str(ex)
42-
if "file" in message:
43-
print(message)
44-
# TODO: check for "Too many open files"
45-
return False
46-
47-
return False
48-
49-
50-
async def _request(method, url, proxy_url, timeout, session: aiohttp.ClientSession=None):
51-
headers = {
52-
'User-Agent': async_requests.get_random_user_agent()
53-
}
54-
55-
conn = ProxyConnector(remote_resolve=True)
56-
57-
with aiohttp.ClientSession(connector=conn, request_class=ProxyClientRequest) as session:
58-
async with session.request(method, url, proxy=proxy_url, timeout=timeout) as response:
59-
# async with session.request(method, url, timeout=timeout, headers=headers, proxy=proxy_url) as response:
60-
status = response.status
61-
text = await response.text()
62-
return async_requests.Response(status, text)
5+
async def check_proxy(proxy_url: str, timeout=None) -> tuple:
6+
results = []
7+
8+
for checker in settings.PROXY_CHECKERS:
9+
result = await checker.check(proxy_url, timeout=timeout)
10+
if result[0] and settings.PROXY_CHECKING_CONDITION == 'or':
11+
return result
12+
13+
results.append(result)
14+
15+
additional_information = CheckerResult()
16+
17+
if settings.PROXY_CHECKING_CONDITION == 'and':
18+
for result in results:
19+
if not result[0]:
20+
return False, additional_information
21+
22+
additional_information.update_from_other(result[1])
23+
24+
return True, additional_information
25+
26+
return False, additional_information

0 commit comments

Comments
 (0)