5
5
import models
6
6
7
7
8
+ # TODO: refactor saving state
8
9
class AbstractCollector :
9
10
"""Base class for all types of collectors"""
10
11
11
12
async def collect (self ):
12
13
"""
13
- this method should return proxies in any of the following formats:
14
+ This method should return proxies in any of the following formats:
14
15
15
16
::
16
17
@@ -19,22 +20,54 @@ async def collect(self):
19
20
protocol://ip:port
20
21
protocol://domain:port
21
22
23
+
24
+ ip can be both ipv4 and ipv6
25
+
26
+ will support yield in the future, now just return list
22
27
"""
23
28
24
29
return []
25
30
26
31
async def _collect (self ):
27
- """do not use!"""
28
- return (await self .collect ())[:settings .COLLECTOR_MAXIMUM_NUMBER_OF_PROXIES_PER_REQUEST ]
32
+ """Do not use! It is called on collector's processing automatically"""
33
+
34
+ # i = 0
35
+ # async for proxy in self.collect():
36
+ # if i > settings.COLLECTOR_MAXIMUM_NUMBER_OF_PROXIES_PER_REQUEST:
37
+ # break
38
+
39
+ # yield proxy
40
+ # i += 1
41
+ proxies = list (await self .collect ())
42
+ proxies = proxies [:settings .COLLECTOR_MAXIMUM_NUMBER_OF_PROXIES_PER_REQUEST ]
43
+ self .last_processing_proxies_count = len (proxies )
44
+ return proxies
29
45
30
46
async def load_state (self , state : models .CollectorState ):
47
+ """
48
+ Function for loading collector's state from database model.
49
+ It's called automatically, don't worry. All you can do is
50
+ to override without forgetting to call parent's method like this:
51
+
52
+ ::
53
+
54
+ async def load_state(self, state):
55
+ super(MyCollector, self).load_state(state)
56
+ # do something here
57
+ """
31
58
self .last_processing_time = state .last_processing_time
32
59
self .processing_period = state .processing_period
60
+ self .last_processing_proxies_count = state .last_processing_proxies_count
33
61
self .data = json .loads (state .data ) if state .data is not None and state .data else {}
34
62
35
- async def set_state (self , state : models .CollectorState ):
63
+ async def save_state (self , state : models .CollectorState ):
64
+ """
65
+ Function for saving collector's state to database model.
66
+ It's called automatically, don't worry.
67
+ """
36
68
state .last_processing_time = self .last_processing_time
37
69
state .processing_period = self .processing_period
70
+ state .last_processing_proxies_count = self .last_processing_proxies_count
38
71
state .data = json .dumps (self .data )
39
72
40
73
last_processing_time = 0
@@ -53,16 +86,25 @@ async def set_state(self, state: models.CollectorState):
53
86
override_maximum_processing_period = None
54
87
"""
55
88
ignore settings' maximum processing period and set
56
- it to value of this variable
89
+ it to the value of this variable
57
90
"""
58
91
59
92
override_minimum_processing_period = None
93
+ """
94
+ ignore settings' minimum processing period and set
95
+ it to the value of this variable, for example
96
+ when some collector has requests time limit
97
+ """
60
98
61
99
data = {}
100
+ # TODO: consider namespacing
62
101
"""
63
102
here you can store some information,
64
103
it will be written into and read from database
65
104
by magic, don't worry about it :)
66
- Just don't use names starting with underscore
105
+ If you're curious, see process_collector_of_state() function
106
+ from processor.py file
107
+
108
+ Don't use names starting with the underscore
67
109
like this one: _last_page
68
110
"""
0 commit comments