@@ -68,6 +68,7 @@ def __init__(self):
68
68
69
69
self .queue = asyncio .Queue (maxsize = settings .PROXY_QUEUE_SIZE )
70
70
self .proxies_semaphore = asyncio .BoundedSemaphore (settings .NUMBER_OF_CONCURRENT_TASKS )
71
+ self .good_proxies_are_processed = False
71
72
72
73
async def worker (self ):
73
74
await asyncio .gather (* [
@@ -93,6 +94,13 @@ async def consumer(self):
93
94
await asyncio .sleep (settings .SLEEP_AFTER_ERROR_PERIOD )
94
95
95
96
async def producer (self ):
97
+ while True :
98
+ await asyncio .gather (* [
99
+ self .process_proxies (),
100
+ self .process_collectors (),
101
+ ])
102
+
103
+ async def process_proxies (self ):
96
104
while True :
97
105
await asyncio .sleep (0.00001 )
98
106
try :
@@ -103,28 +111,15 @@ async def producer(self):
103
111
Proxy .last_check_time < time .time () - Proxy .checking_period ,
104
112
).order_by (Proxy .last_check_time ).limit (settings .NUMBER_OF_CONCURRENT_TASKS )
105
113
)
114
+ if proxies :
115
+ self .good_proxies_are_processed = False
106
116
107
117
await self .add_proxies_to_queue (proxies )
108
118
109
- if len ( proxies ) > settings . NUMBER_OF_CONCURRENT_TASKS / 2 :
119
+ if proxies :
110
120
continue
111
121
112
- # check collectors
113
- collector_states = await db .execute (
114
- CollectorState .select ().where (
115
- CollectorState .last_processing_time < time .time () - CollectorState .processing_period
116
- ).order_by (peewee .fn .Random ()).
117
- limit (settings .NUMBER_OF_CONCURRENT_COLLECTORS )
118
- )
119
-
120
- tasks = [
121
- self .process_collector_of_state (collector_state )
122
- for collector_state in collector_states
123
- ]
124
-
125
- if tasks :
126
- await asyncio .gather (* tasks )
127
- continue
122
+ self .good_proxies_are_processed = True
128
123
129
124
# check bad proxies
130
125
proxies = await db .execute (
@@ -159,6 +154,31 @@ async def producer(self):
159
154
160
155
await asyncio .sleep (settings .SLEEP_AFTER_ERROR_PERIOD )
161
156
157
+ async def process_collectors (self ):
158
+ while True :
159
+ try :
160
+ await asyncio .sleep (0.000001 )
161
+
162
+ # check collectors
163
+ collector_states = await db .execute (
164
+ CollectorState .select ().where (
165
+ CollectorState .last_processing_time < time .time () - CollectorState .processing_period
166
+ ).order_by (peewee .fn .Random ()).limit (settings .NUMBER_OF_CONCURRENT_COLLECTORS )
167
+ )
168
+
169
+ await asyncio .gather (* [
170
+ self .process_collector_of_state (collector_state )
171
+ for collector_state in collector_states
172
+ ])
173
+ except KeyboardInterrupt as ex :
174
+ raise ex
175
+ except BaseException as ex :
176
+ self .collectors_logger .exception (ex )
177
+ if settings .DEBUG :
178
+ raise ex
179
+
180
+ await asyncio .sleep (settings .SLEEP_AFTER_ERROR_PERIOD )
181
+
162
182
def is_queue_free (self ):
163
183
return self .queue .qsize () < settings .NUMBER_OF_CONCURRENT_TASKS
164
184
@@ -183,15 +203,15 @@ async def process_collector_of_state(self, collector_state):
183
203
)
184
204
proxies = await collector ._collect ()
185
205
186
- if not proxies :
187
- self .collectors_logger .warning (
188
- "got 0 proxies from collector of type \" {}\" " .format (type (collector ))
189
- )
190
- else :
206
+ if proxies :
191
207
self .logger .debug (
192
208
"got {} proxies from collector of type \" {}\" " .format (len (proxies ), type (collector ))
193
209
)
194
210
await self .process_raw_proxies (proxies , collector_state .id )
211
+ else :
212
+ self .collectors_logger .warning (
213
+ "got 0 proxies from collector of type \" {}\" " .format (type (collector ))
214
+ )
195
215
except KeyboardInterrupt as ex :
196
216
raise ex
197
217
except BaseException as ex :
@@ -208,6 +228,7 @@ async def process_raw_proxies(self, proxies, collector_id):
208
228
tasks = []
209
229
210
230
for proxy in proxies :
231
+ # TODO: refactor it
211
232
tasks .append (self .process_raw_proxy (proxy , collector_id ))
212
233
if len (tasks ) > settings .NUMBER_OF_CONCURRENT_TASKS :
213
234
await asyncio .gather (* tasks )
@@ -261,6 +282,9 @@ async def process_raw_proxy(self, proxy, collector_id):
261
282
pass
262
283
263
284
for raw_protocol in range (len (Proxy .PROTOCOLS )):
285
+ while not self .good_proxies_are_processed :
286
+ await asyncio .sleep (0.01 )
287
+
264
288
await self .queue .put ((
265
289
raw_protocol ,
266
290
auth_data ,
0 commit comments