@@ -26,6 +26,13 @@ class Processor:
26
26
"""
27
27
instance = None
28
28
29
+ @staticmethod
30
+ def get_instance ():
31
+ if Processor .instance is None :
32
+ Processor .instance = Processor ()
33
+
34
+ return Processor .instance
35
+
29
36
def __init__ (self ):
30
37
self .logger = logging .getLogger ("proxy_py/processor" )
31
38
@@ -55,14 +62,7 @@ def __init__(self):
55
62
self .logger .debug ("processor initialization..." )
56
63
57
64
self .queue = asyncio .Queue (maxsize = settings .PROXY_QUEUE_SIZE )
58
- self .proxies_semaphore = asyncio .BoundedSemaphore (settings .CONCURRENT_TASKS_COUNT )
59
-
60
- @staticmethod
61
- def get_instance ():
62
- if Processor .instance is None :
63
- Processor .instance = Processor ()
64
-
65
- return Processor .instance
65
+ self .proxies_semaphore = asyncio .BoundedSemaphore (settings .NUMBER_OF_CONCURRENT_TASKS )
66
66
67
67
async def worker (self ):
68
68
await asyncio .gather (* [
@@ -89,19 +89,19 @@ async def consumer(self):
89
89
90
90
async def producer (self ):
91
91
while True :
92
- await asyncio .sleep (0.000001 )
92
+ await asyncio .sleep (0.00001 )
93
93
try :
94
94
# check good proxies
95
95
proxies = await db .execute (
96
96
Proxy .select ().where (
97
97
Proxy .number_of_bad_checks == 0 ,
98
98
Proxy .last_check_time < time .time () - Proxy .checking_period ,
99
- ).order_by (Proxy .last_check_time ).limit (settings .CONCURRENT_TASKS_COUNT )
99
+ ).order_by (Proxy .last_check_time ).limit (settings .NUMBER_OF_CONCURRENT_TASKS )
100
100
)
101
101
102
102
await self .add_proxies_to_queue (proxies )
103
103
104
- if len (proxies ) > settings .CONCURRENT_TASKS_COUNT / 2 :
104
+ if len (proxies ) > settings .NUMBER_OF_CONCURRENT_TASKS / 2 :
105
105
continue
106
106
107
107
# check collectors
@@ -127,7 +127,7 @@ async def producer(self):
127
127
Proxy .number_of_bad_checks > 0 ,
128
128
Proxy .number_of_bad_checks < settings .DEAD_PROXY_THRESHOLD ,
129
129
Proxy .last_check_time < time .time () - settings .BAD_PROXY_CHECKING_PERIOD ,
130
- ).order_by (Proxy .last_check_time ).limit (settings .CONCURRENT_TASKS_COUNT )
130
+ ).order_by (Proxy .last_check_time ).limit (settings .NUMBER_OF_CONCURRENT_TASKS )
131
131
)
132
132
133
133
await self .add_proxies_to_queue (proxies )
@@ -141,7 +141,7 @@ async def producer(self):
141
141
Proxy .number_of_bad_checks >= settings .DEAD_PROXY_THRESHOLD ,
142
142
Proxy .number_of_bad_checks < settings .DO_NOT_CHECK_ON_N_BAD_CHECKS ,
143
143
Proxy .last_check_time < time .time () - settings .DEAD_PROXY_CHECKING_PERIOD ,
144
- ).order_by (Proxy .last_check_time ).limit (settings .CONCURRENT_TASKS_COUNT )
144
+ ).order_by (Proxy .last_check_time ).limit (settings .NUMBER_OF_CONCURRENT_TASKS )
145
145
)
146
146
147
147
await self .add_proxies_to_queue (proxies )
@@ -154,87 +154,8 @@ async def producer(self):
154
154
155
155
await asyncio .sleep (settings .SLEEP_AFTER_ERROR_PERIOD )
156
156
157
- # async def process_collectors(self):
158
- # while True:
159
- # await asyncio.sleep(0.001)
160
- # try:
161
- # if not self.is_queue_free():
162
- # continue
163
- #
164
- # # check collectors
165
- # collector_states = await db.execute(
166
- # CollectorState.select().where(
167
- # CollectorState.last_processing_time < time.time() - CollectorState.processing_period
168
- # ).limit(settings.CONCURRENT_TASKS_COUNT)
169
- # )
170
- #
171
- # tasks = [
172
- # self.process_collector_of_state(collector_state)
173
- # for collector_state in collector_states
174
- # ]
175
- #
176
- # if tasks:
177
- # await asyncio.gather(*tasks)
178
- # tasks.clear()
179
- #
180
- # # await self.queue.join()
181
- # except KeyboardInterrupt as ex:
182
- # raise ex
183
- # except BaseException as ex:
184
- # self.logger.exception(ex)
185
- # await asyncio.sleep(settings.SLEEP_AFTER_ERROR_PERIOD)
186
-
187
157
def is_queue_free (self ):
188
- return self .queue .qsize () < settings .CONCURRENT_TASKS_COUNT
189
-
190
- # async def process_proxies(self):
191
- # while True:
192
- # await asyncio.sleep(0.001)
193
- # try:
194
- # # check good proxies
195
- # proxies = await db.execute(
196
- # Proxy.select().where(
197
- # Proxy.number_of_bad_checks == 0,
198
- # Proxy.last_check_time < time.time() - Proxy.checking_period,
199
- # ).order_by(Proxy.last_check_time).limit(settings.CONCURRENT_TASKS_COUNT)
200
- # )
201
- #
202
- # await self.add_proxies_to_queue(proxies)
203
- #
204
- # if len(proxies) > 0 or not self.is_queue_free():
205
- # continue
206
- #
207
- # # check bad proxies
208
- # proxies = await db.execute(
209
- # Proxy.select().where(
210
- # Proxy.number_of_bad_checks > 0,
211
- # Proxy.number_of_bad_checks < settings.DEAD_PROXY_THRESHOLD,
212
- # Proxy.last_check_time < time.time() - settings.BAD_PROXY_CHECKING_PERIOD,
213
- # ).order_by(Proxy.last_check_time).limit(settings.CONCURRENT_TASKS_COUNT)
214
- # )
215
- #
216
- # await self.add_proxies_to_queue(proxies)
217
- #
218
- # if len(proxies) > 0 or not self.is_queue_free():
219
- # continue
220
- #
221
- # # check dead proxies
222
- # proxies = await db.execute(
223
- # Proxy.select().where(
224
- # Proxy.number_of_bad_checks >= settings.DEAD_PROXY_THRESHOLD,
225
- # Proxy.number_of_bad_checks < settings.REMOVE_ON_N_BAD_CHECKS,
226
- # Proxy.last_check_time < time.time() - settings.DEAD_PROXY_CHECKING_PERIOD,
227
- # ).order_by(Proxy.last_check_time).limit(settings.CONCURRENT_TASKS_COUNT)
228
- # )
229
- #
230
- # await self.add_proxies_to_queue(proxies)
231
- # except KeyboardInterrupt as ex:
232
- # raise ex
233
- # except BaseException as ex:
234
- # self.logger.exception(ex)
235
- # if settings.DEBUG:
236
- # raise ex
237
- # await asyncio.sleep(10)
158
+ return self .queue .qsize () < settings .NUMBER_OF_CONCURRENT_TASKS
238
159
239
160
async def add_proxy_to_queue (self , proxy : Proxy ):
240
161
await self .queue .put ((
@@ -250,8 +171,6 @@ async def add_proxies_to_queue(self, proxies: list):
250
171
await self .add_proxy_to_queue (proxy )
251
172
252
173
async def process_collector_of_state (self , collector_state ):
253
- proxies = set ()
254
-
255
174
collector = await collectors_list .load_collector (collector_state )
256
175
try :
257
176
self .logger .debug (
@@ -277,16 +196,15 @@ async def process_collector_of_state(self, collector_state):
277
196
self .collectors_logger .exception (ex )
278
197
finally :
279
198
collector .last_processing_time = int (time .time ())
280
- await collector .save_state (collector_state )
281
- # TODO: save new proxies count
282
- await db .update (collector_state )
199
+ # TODO: new proxies count
200
+ await collectors_list .save_collector (collector_state )
283
201
284
202
async def process_raw_proxies (self , proxies , collector_id ):
285
203
tasks = []
286
204
287
205
for proxy in proxies :
288
206
tasks .append (self .process_raw_proxy (proxy , collector_id ))
289
- if len (tasks ) > settings .CONCURRENT_TASKS_COUNT :
207
+ if len (tasks ) > settings .NUMBER_OF_CONCURRENT_TASKS :
290
208
await asyncio .gather (* tasks )
291
209
tasks .clear ()
292
210
0 commit comments