some fixes

DevAlone · DevAlone · commit c40a4834c09a · 2018-07-03T23:08:57.000+03:00
diff --git a/collectors/collector.py b/collectors/collector.py
@@ -38,6 +38,7 @@ async def collect(self):
     async def _collect(self):
         """Do not use! It is called on collector's processing automatically"""
 
+        # TODO: uncomment when python 3.6 come to ubuntu lts
         # i = 0
         # async for proxy in self.collect():
         #     if i > settings.COLLECTOR_MAXIMUM_NUMBER_OF_PROXIES_PER_REQUEST:
diff --git a/collectors/pages_collector.py b/collectors/pages_collector.py
@@ -10,7 +10,8 @@ class PagesCollector(AbstractCollector):
     This collector will care about pages, increment it on each processing
     and will reset it to 0 if there is no proxies on the page or if proxies
     are the same as those on the previous one. If you don't want such smart
-    behavior, just set dynamic_pages_count to false and set pages_count manually.
+    behavior, just set dynamic_pages_count to false
+    and set pages_count manually.
     """
 
     def __init__(self):
@@ -30,7 +31,7 @@ async def collect(self):
                 self.pages_count = self.current_page + 2
                 """
                 for those APIs which returns
-                the last page for nonexistent ones 
+                the last page for nonexistent ones
                 """
                 proxies_set = set(proxies)
 
@@ -55,11 +56,12 @@ async def process_page(self, page_index):
         """
         return []
 
-    """set this value or use dynamic pages count"""
     pages_count = 0
+    """set this value or use dynamic pages count"""
     current_page = 0
-    """use dynamic pages count"""
+
     dynamic_pages_count = True
+    """use dynamic pages count"""
 
     processing_period = 60 * 10
 
diff --git a/collectors/web/com/gatherproxy/collector.py b/collectors/web/com/gatherproxy/collector.py
@@ -12,6 +12,7 @@ class Collector(PagesCollector):
 
     def __init__(self):
         super(Collector, self).__init__()
+        # TODO: remove?
         self.pages_count = 57
 
     async def process_page(self, page_index):
@@ -21,14 +22,17 @@ async def process_page(self, page_index):
             'PageIdx': page_index + 1,
             'Uptime': 0
         }
-        res = await async_requests.post('http://www.gatherproxy.com/proxylist/anonymity/?t=Elite', data=form_data)
+        url = 'http://www.gatherproxy.com/proxylist/anonymity/?t=Elite'
+        res = await async_requests.post(url, data=form_data)
         html_res = res.text
         tree = html.fromstring(html_res)
         table_element = \
             tree.xpath(".//table[@id='tblproxy']")[0]
         table_text = etree.tostring(table_element).decode()
-        matches = re.findall(r"document\.write\('([0-9.]+)'\).+?document\.write\(gp\.dep\('(.+?)'\)\)",
-                             table_text, re.DOTALL)
+        matches = re.findall(
+            r"document\.write\('([0-9.]+)'\).+?document\.write\(gp\.dep\('(.+?)'\)\)",
+            table_text, re.DOTALL
+        )
 
         for m in matches:
             ip = m[0]