Skip to content

Commit c40a483

Browse files
committed
some fixes
1 parent 6dac38a commit c40a483

File tree

3 files changed

+14
-7
lines changed

3 files changed

+14
-7
lines changed

collectors/collector.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ async def collect(self):
3838
async def _collect(self):
3939
"""Do not use! It is called on collector's processing automatically"""
4040

41+
# TODO: uncomment when python 3.6 come to ubuntu lts
4142
# i = 0
4243
# async for proxy in self.collect():
4344
# if i > settings.COLLECTOR_MAXIMUM_NUMBER_OF_PROXIES_PER_REQUEST:

collectors/pages_collector.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ class PagesCollector(AbstractCollector):
1010
This collector will care about pages, increment it on each processing
1111
and will reset it to 0 if there is no proxies on the page or if proxies
1212
are the same as those on the previous one. If you don't want such smart
13-
behavior, just set dynamic_pages_count to false and set pages_count manually.
13+
behavior, just set dynamic_pages_count to false
14+
and set pages_count manually.
1415
"""
1516

1617
def __init__(self):
@@ -30,7 +31,7 @@ async def collect(self):
3031
self.pages_count = self.current_page + 2
3132
"""
3233
for those APIs which returns
33-
the last page for nonexistent ones
34+
the last page for nonexistent ones
3435
"""
3536
proxies_set = set(proxies)
3637

@@ -55,11 +56,12 @@ async def process_page(self, page_index):
5556
"""
5657
return []
5758

58-
"""set this value or use dynamic pages count"""
5959
pages_count = 0
60+
"""set this value or use dynamic pages count"""
6061
current_page = 0
61-
"""use dynamic pages count"""
62+
6263
dynamic_pages_count = True
64+
"""use dynamic pages count"""
6365

6466
processing_period = 60 * 10
6567

collectors/web/com/gatherproxy/collector.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ class Collector(PagesCollector):
1212

1313
def __init__(self):
1414
super(Collector, self).__init__()
15+
# TODO: remove?
1516
self.pages_count = 57
1617

1718
async def process_page(self, page_index):
@@ -21,14 +22,17 @@ async def process_page(self, page_index):
2122
'PageIdx': page_index + 1,
2223
'Uptime': 0
2324
}
24-
res = await async_requests.post('http://www.gatherproxy.com/proxylist/anonymity/?t=Elite', data=form_data)
25+
url = 'http://www.gatherproxy.com/proxylist/anonymity/?t=Elite'
26+
res = await async_requests.post(url, data=form_data)
2527
html_res = res.text
2628
tree = html.fromstring(html_res)
2729
table_element = \
2830
tree.xpath(".//table[@id='tblproxy']")[0]
2931
table_text = etree.tostring(table_element).decode()
30-
matches = re.findall(r"document\.write\('([0-9.]+)'\).+?document\.write\(gp\.dep\('(.+?)'\)\)",
31-
table_text, re.DOTALL)
32+
matches = re.findall(
33+
r"document\.write\('([0-9.]+)'\).+?document\.write\(gp\.dep\('(.+?)'\)\)",
34+
table_text, re.DOTALL
35+
)
3236

3337
for m in matches:
3438
ip = m[0]

0 commit comments

Comments
 (0)