File tree Expand file tree Collapse file tree 3 files changed +14
-7
lines changed Expand file tree Collapse file tree 3 files changed +14
-7
lines changed Original file line number Diff line number Diff line change @@ -38,6 +38,7 @@ async def collect(self):
38
38
async def _collect (self ):
39
39
"""Do not use! It is called on collector's processing automatically"""
40
40
41
+ # TODO: uncomment when python 3.6 come to ubuntu lts
41
42
# i = 0
42
43
# async for proxy in self.collect():
43
44
# if i > settings.COLLECTOR_MAXIMUM_NUMBER_OF_PROXIES_PER_REQUEST:
Original file line number Diff line number Diff line change @@ -10,7 +10,8 @@ class PagesCollector(AbstractCollector):
10
10
This collector will care about pages, increment it on each processing
11
11
and will reset it to 0 if there is no proxies on the page or if proxies
12
12
are the same as those on the previous one. If you don't want such smart
13
- behavior, just set dynamic_pages_count to false and set pages_count manually.
13
+ behavior, just set dynamic_pages_count to false
14
+ and set pages_count manually.
14
15
"""
15
16
16
17
def __init__ (self ):
@@ -30,7 +31,7 @@ async def collect(self):
30
31
self .pages_count = self .current_page + 2
31
32
"""
32
33
for those APIs which returns
33
- the last page for nonexistent ones
34
+ the last page for nonexistent ones
34
35
"""
35
36
proxies_set = set (proxies )
36
37
@@ -55,11 +56,12 @@ async def process_page(self, page_index):
55
56
"""
56
57
return []
57
58
58
- """set this value or use dynamic pages count"""
59
59
pages_count = 0
60
+ """set this value or use dynamic pages count"""
60
61
current_page = 0
61
- """use dynamic pages count"""
62
+
62
63
dynamic_pages_count = True
64
+ """use dynamic pages count"""
63
65
64
66
processing_period = 60 * 10
65
67
Original file line number Diff line number Diff line change @@ -12,6 +12,7 @@ class Collector(PagesCollector):
12
12
13
13
def __init__ (self ):
14
14
super (Collector , self ).__init__ ()
15
+ # TODO: remove?
15
16
self .pages_count = 57
16
17
17
18
async def process_page (self , page_index ):
@@ -21,14 +22,17 @@ async def process_page(self, page_index):
21
22
'PageIdx' : page_index + 1 ,
22
23
'Uptime' : 0
23
24
}
24
- res = await async_requests .post ('http://www.gatherproxy.com/proxylist/anonymity/?t=Elite' , data = form_data )
25
+ url = 'http://www.gatherproxy.com/proxylist/anonymity/?t=Elite'
26
+ res = await async_requests .post (url , data = form_data )
25
27
html_res = res .text
26
28
tree = html .fromstring (html_res )
27
29
table_element = \
28
30
tree .xpath (".//table[@id='tblproxy']" )[0 ]
29
31
table_text = etree .tostring (table_element ).decode ()
30
- matches = re .findall (r"document\.write\('([0-9.]+)'\).+?document\.write\(gp\.dep\('(.+?)'\)\)" ,
31
- table_text , re .DOTALL )
32
+ matches = re .findall (
33
+ r"document\.write\('([0-9.]+)'\).+?document\.write\(gp\.dep\('(.+?)'\)\)" ,
34
+ table_text , re .DOTALL
35
+ )
32
36
33
37
for m in matches :
34
38
ip = m [0 ]
You can’t perform that action at this time.
0 commit comments