Skip to content

Commit 6dac38a

Browse files
committed
little refactoring
1 parent 118a5b8 commit 6dac38a

File tree

10 files changed

+46
-21
lines changed

10 files changed

+46
-21
lines changed

collectors/collector.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ class AbstractCollector:
1212
__collector__ = False
1313
"""Set this variable to True in your collector's implementation"""
1414

15+
def __init__(self):
16+
self.data = {}
17+
self.saved_variables = set()
18+
1519
async def collect(self):
1620
"""
1721
This method should return proxies in any of the following formats:
@@ -62,6 +66,9 @@ async def load_state(self, state):
6266
self.processing_period = state.processing_period
6367
self.last_processing_proxies_count = state.last_processing_proxies_count
6468
self.data = json.loads(state.data) if state.data is not None and state.data else {}
69+
if '_variables' in self.data:
70+
for var_name in self.data['_variables']:
71+
setattr(self, var_name, self.data['_variables'][var_name])
6572

6673
async def save_state(self, state: models.CollectorState):
6774
"""
@@ -71,11 +78,21 @@ async def save_state(self, state: models.CollectorState):
7178
state.last_processing_time = self.last_processing_time
7279
state.processing_period = self.processing_period
7380
state.last_processing_proxies_count = self.last_processing_proxies_count
81+
82+
for var_name in self.saved_variables:
83+
if '_variables' not in self.data:
84+
self.data['_variables'] = {}
85+
86+
self.data['_variables'][var_name] = getattr(self, var_name)
87+
7488
state.data = json.dumps(self.data)
7589

7690
last_processing_time = 0
7791
"""time in unix timestamp(seconds from 01.01.1970)"""
7892

93+
last_processing_proxies_count = 0
94+
"""how many proxies we got on last request, do not change manually"""
95+
7996
processing_period = 60 * 60
8097
"""processing period in seconds"""
8198

@@ -99,8 +116,7 @@ async def save_state(self, state: models.CollectorState):
99116
when some collector has requests time limit
100117
"""
101118

102-
data = {}
103-
# TODO: consider namespacing
119+
data = None
104120
"""
105121
here you can store some information,
106122
it will be written into and read from database
@@ -111,3 +127,9 @@ async def save_state(self, state: models.CollectorState):
111127
Don't use names starting with the underscore
112128
like this one: _last_page
113129
"""
130+
131+
saved_variables = None
132+
"""
133+
Set of variables which are saved to database automatically(inside data dict)
134+
"""
135+

collectors/pages_collector.py

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,12 @@ class PagesCollector(AbstractCollector):
1313
behavior, just set dynamic_pages_count to false and set pages_count manually.
1414
"""
1515

16-
async def load_state(self, state):
17-
await super(PagesCollector, self).load_state(state)
18-
if "_current_page" in self.data:
19-
self.current_page = self.data["_current_page"]
20-
21-
if "_pages_count" in self.data:
22-
self.pages_count = self.data["_pages_count"]
23-
24-
async def save_state(self, state):
25-
await super(PagesCollector, self).save_state(state)
26-
self.data["_current_page"] = self.current_page
27-
self.data["_pages_count"] = self.pages_count
16+
def __init__(self):
17+
super(PagesCollector, self).__init__()
18+
self.last_proxies_list = []
19+
self.saved_variables.add('current_page')
20+
self.saved_variables.add('pages_count')
21+
self.saved_variables.add('last_proxies_list')
2822

2923
async def collect(self):
3024
proxies = list(
@@ -40,19 +34,17 @@ async def collect(self):
4034
"""
4135
proxies_set = set(proxies)
4236

43-
if "_last_proxies_set" in self.data:
44-
if set(self.data["_last_proxies_set"]) == proxies_set:
45-
self.pages_count = self.current_page + 1
37+
if set(self.last_proxies_list) == proxies_set:
38+
self.pages_count = self.current_page + 1
4639

47-
self.data["_last_proxies_set"] = list(proxies_set)
40+
self.last_proxies_list = list(proxies_set)
4841
else:
4942
self.pages_count = self.current_page + 1
5043

5144
self.current_page += 1
5245
if self.current_page >= self.pages_count:
5346
self.current_page = 0
5447

55-
5648
return proxies
5749

5850
async def process_page(self, page_index):
@@ -70,3 +62,5 @@ async def process_page(self, page_index):
7062
dynamic_pages_count = True
7163

7264
processing_period = 60 * 10
65+
66+
last_proxies_list = None

collectors/web/com/gatherproxy/collector.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class Collector(PagesCollector):
1111
__collector__ = True
1212

1313
def __init__(self):
14+
super(Collector, self).__init__()
1415
self.pages_count = 57
1516

1617
async def process_page(self, page_index):

collectors/web/com/nordvpn/collector.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class Collector(PagesCollector):
1616
processing_period = 10 * 60
1717

1818
def __init__(self):
19+
super(Collector, self).__init__()
1920
self.pages_count = 10
2021
self.limit = 100
2122

collectors/web/com/premproxy/collector.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
class BaseCollectorPremProxyCom(PagesCollector):
1111
def __init__(self, url, pages_count):
12+
super(BaseCollectorPremProxyCom, self).__init__()
1213
self.url = url
1314
self.pages_count = pages_count
1415

@@ -57,8 +58,8 @@ def __init__(self):
5758
super(Collector, self).__init__('https://premproxy.com/list/', 20)
5859

5960

60-
class Collector(BaseCollectorPremProxyCom):
61+
class CollectorSocksList(BaseCollectorPremProxyCom):
6162
__collector__ = True
6263

6364
def __init__(self):
64-
super(Collector, self).__init__('https://premproxy.com/socks-list/', 20)
65+
super(CollectorSocksList, self).__init__('https://premproxy.com/socks-list/', 20)

collectors/web/net/checkerproxy/collector.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ class Collector(AbstractCollector):
99
__collector__ = True
1010

1111
def __init__(self):
12+
super(Collector, self).__init__()
1213
self.processing_period = 3600 * 12
1314
self.time_delta = datetime.timedelta(-1)
1415

@@ -27,5 +28,6 @@ class CollectorToday(Collector):
2728
__collector__ = True
2829

2930
def __init__(self):
31+
super(CollectorToday, self).__init__()
3032
self.processing_period = 3600 * 3
3133
self.time_delta = datetime.timedelta(0)

collectors/web/net/free_proxy_list/collector.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
class BaseCollectorFreeProxyListNet(AbstractCollector):
99
def __init__(self, url):
10+
super(BaseCollectorFreeProxyListNet, self).__init__()
1011
self.url = url
1112

1213
async def collect(self):

collectors/web/net/freeproxylists/collector.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class Collector(PagesCollector):
2222
__collector__ = False
2323

2424
def __init__(self):
25+
super(Collector, self).__init__()
2526
self.dynamic_pages_count = True
2627

2728
async def process_page(self, page_index):

collectors/web/org/proxy_list/proxy_list_org/collector.py renamed to collectors/web/org/proxy_list/collector.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class Collector(PagesCollector):
1111
__collector__ = True
1212

1313
def __init__(self):
14+
super(Collector, self).__init__()
1415
self.pages_count = 10
1516

1617
async def process_page(self, page_index):

models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ class Meta:
132132
processing_period = peewee.IntegerField(null=False)
133133
last_processing_time = peewee.IntegerField(null=False)
134134
last_processing_proxies_count = peewee.IntegerField(default=0, null=False)
135+
# TODO: add new proxies
135136
last_processing_new_proxies_count = peewee.IntegerField(default=0, null=False)
136137
data = peewee.TextField(default=None, null=True)
137138

0 commit comments

Comments
 (0)