Skip to content

Commit 03a829b

Browse files
committed
a little optimization
1 parent 2f30c30 commit 03a829b

File tree

2 files changed

+64
-22
lines changed

2 files changed

+64
-22
lines changed

processor.py

Lines changed: 62 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -188,29 +188,69 @@ async def process_collector_of_state(self, collector_state):
188188
# TODO: save new proxies count
189189

190190
async def process_raw_proxies(self, proxies, collector_id):
191+
tasks = []
192+
191193
for proxy in proxies:
192-
self.logger.debug("adding raw proxy \"{}\" to queue".format(proxy))
193-
matches = re.match(PROXY_VALIDATE_REGEX, proxy)
194-
if matches:
195-
matches = matches.groupdict()
196-
auth_data = matches["auth_data"]
197-
domain = matches["domain"]
198-
port = matches["port"]
199-
200-
if auth_data is None:
201-
auth_data = ""
202-
203-
if domain is None or port is None:
204-
raise Exception("Bad raw proxy \"{}\" from collector \"{}\"".format(proxy, collector_id))
205-
206-
for raw_protocol in range(len(Proxy.PROTOCOLS)):
207-
await self.queue.put((
208-
raw_protocol,
209-
auth_data,
210-
domain,
211-
port,
212-
collector_id,
213-
))
194+
tasks.append(self.process_raw_proxy(proxy, collector_id))
195+
if len(tasks) > settings.CONCURRENT_TASKS_COUNT:
196+
await asyncio.wait(tasks)
197+
tasks.clear()
198+
199+
if tasks:
200+
await asyncio.wait(tasks)
201+
202+
async def process_raw_proxy(self, proxy, collector_id):
203+
self.logger.debug("adding raw proxy \"{}\" to queue".format(proxy))
204+
205+
matches = re.match(PROXY_VALIDATE_REGEX, proxy)
206+
if matches:
207+
matches = matches.groupdict()
208+
auth_data = matches["auth_data"]
209+
domain = matches["domain"]
210+
port = matches["port"]
211+
212+
if auth_data is None:
213+
auth_data = ""
214+
215+
if domain is None or port is None:
216+
self.collectors_logger.error(
217+
"Bad raw proxy \"{}\" from collector \"{}\"".format(proxy, collector_id)
218+
)
219+
return
220+
221+
# don't care about protocol
222+
try:
223+
proxy = await db.get(
224+
Proxy.select().where(
225+
Proxy.auth_data == auth_data,
226+
Proxy.domain == domain,
227+
Proxy.port == port,
228+
)
229+
)
230+
231+
if proxy.last_check_time + settings.PROXY_NOT_CHECKING_PERIOD >= time.time():
232+
proxy_short_address = ""
233+
if auth_data:
234+
proxy_short_address += "@" + auth_data
235+
236+
proxy_short_address += "{}:{}".format(domain, port)
237+
238+
self.logger.debug(
239+
"skip proxy \"{}\" from collector \"{}\"".format(
240+
proxy_short_address, collector_id)
241+
)
242+
return
243+
except Proxy.DoesNotExist:
244+
pass
245+
246+
for raw_protocol in range(len(Proxy.PROTOCOLS)):
247+
await self.queue.put((
248+
raw_protocol,
249+
auth_data,
250+
domain,
251+
port,
252+
collector_id,
253+
))
214254

215255
async def process_proxy(self, raw_protocol: int, auth_data: str, domain: str, port: int, collector_id: int):
216256
self.logger.debug(

proxy_py/_settings.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
DEAD_PROXY_THRESHOLD = 6
3838
REMOVE_ON_N_BAD_CHECKS = DEAD_PROXY_THRESHOLD + 64
3939
PROXY_CHECKING_TIMEOUT = 20
40+
# do not check proxy from collector if it has been checked recently
41+
PROXY_NOT_CHECKING_PERIOD = 15 * 60
4042

4143
PROXY_PROVIDER_SERVER_ADDRESS = {
4244
'HOST': 'localhost',

0 commit comments

Comments
 (0)