Skip to content

Commit 32a640f

Browse files
committed
migrate to python3.6
1 parent 301524d commit 32a640f

File tree

5 files changed

+45
-42
lines changed

5 files changed

+45
-42
lines changed

checkers/base_checker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
from proxy_py import settings
33

44
import ssl
5+
import aiohttp
56
import aiosocks
67
import asyncio
78
import async_requests
8-
import aiohttp
99

1010

1111
class CheckerResult:

collectors/abstract_collector.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# TODO: add wrapper for doing requests and saving its cookies and UserAgent
2+
import asyncio
3+
24
from proxy_py import settings
35

46
import json
@@ -35,20 +37,23 @@ async def collect(self):
3537
return []
3638

3739
async def _collect(self):
38-
"""Do not use! It is called on collector's processing automatically"""
39-
40-
# TODO: uncomment when python 3.6 comes to ubuntu lts
41-
# i = 0
42-
# async for proxy in self.collect():
43-
# if i > settings.COLLECTOR_MAXIMUM_NUMBER_OF_PROXIES_PER_REQUEST:
44-
# break
45-
46-
# yield proxy
47-
# i += 1
48-
proxies = list(await self.collect())
49-
proxies = proxies[:settings.COLLECTOR_MAXIMUM_NUMBER_OF_PROXIES_PER_REQUEST]
50-
self.last_processing_proxies_count = len(proxies)
51-
return proxies
40+
"""Do not call yourself! It is called on collector's processing automatically"""
41+
collect = self.collect()
42+
if asyncio.iscoroutine(collect):
43+
async def wrapper(f):
44+
for item in (await f):
45+
yield item
46+
collect = wrapper(collect)
47+
48+
i = 0
49+
async for proxy in collect:
50+
if i > settings.COLLECTOR_MAXIMUM_NUMBER_OF_PROXIES_PER_REQUEST:
51+
break
52+
53+
yield proxy
54+
i += 1
55+
56+
self.last_processing_proxies_count = i
5257

5358
async def load_state(self, state: models.CollectorState):
5459
"""

processor.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -164,17 +164,28 @@ async def process_collector_of_state(self, collector_state):
164164
self.logger.debug(
165165
"start processing collector of type \"{}\"".format(type(collector))
166166
)
167-
proxies = await collector._collect()
168167

169-
if proxies:
170-
self.logger.debug(
171-
"got {} proxies from collector of type \"{}\"".format(len(proxies), type(collector))
172-
)
173-
await self.process_raw_proxies(proxies, collector_state.id)
174-
else:
168+
tasks = []
169+
number_of_proxies = 0
170+
async for proxy in collector._collect():
171+
number_of_proxies += 1
172+
tasks.append(self.process_raw_proxy(proxy, collector_state.id))
173+
174+
if len(tasks) > settings.NUMBER_OF_CONCURRENT_TASKS:
175+
await asyncio.gather(*tasks)
176+
tasks.clear()
177+
178+
if tasks:
179+
await asyncio.gather(*tasks)
180+
181+
if number_of_proxies == 0:
175182
self.collectors_logger.warning(
176183
"got 0 proxies from collector of type \"{}\"".format(type(collector))
177184
)
185+
else:
186+
self.collectors_logger.info(
187+
f"got {number_of_proxies} proxies from collector of type \"{type(collector)}\""
188+
)
178189
except KeyboardInterrupt as ex:
179190
raise ex
180191
except BaseException as ex:
@@ -187,19 +198,6 @@ async def process_collector_of_state(self, collector_state):
187198
# TODO: new proxies count
188199
await collectors_list.save_collector(collector_state)
189200

190-
async def process_raw_proxies(self, proxies, collector_id):
191-
tasks = []
192-
193-
for proxy in proxies:
194-
# TODO: refactor it
195-
tasks.append(self.process_raw_proxy(proxy, collector_id))
196-
if len(tasks) > settings.NUMBER_OF_CONCURRENT_TASKS:
197-
await asyncio.gather(*tasks)
198-
tasks.clear()
199-
200-
if tasks:
201-
await asyncio.gather(*tasks)
202-
203201
async def process_raw_proxy(self, proxy, collector_id):
204202
self.logger.debug("processing raw proxy \"{}\"".format(proxy))
205203

proxy_py/_settings.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111

1212
DATABASE_CONNECTION_ARGS = ()
1313
DATABASE_CONNECTION_KWARGS = {
14-
'database': 'test',
15-
'user': 'test',
16-
'password': 'test',
14+
'database': 'proxy_py',
15+
'user': 'proxy_py',
16+
'password': 'proxy_py',
1717
'max_connections': 20,
1818
}
1919

@@ -32,11 +32,11 @@
3232
# 'local/collectors', # use to add your own collectors
3333
]
3434

35-
NUMBER_OF_CONCURRENT_TASKS = 64
35+
NUMBER_OF_CONCURRENT_TASKS = 128
3636
# makes aiohttp to not send more
3737
# than this number of simultaneous requests
3838
# works by common connector
39-
NUMBER_OF_SIMULTANEOUS_REQUESTS = 64
39+
NUMBER_OF_SIMULTANEOUS_REQUESTS = 128
4040
# the same, but per host
4141
NUMBER_OF_SIMULTANEOUS_REQUESTS_PER_HOST = NUMBER_OF_SIMULTANEOUS_REQUESTS
4242

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
aiohttp==2.3.10
2-
aiosocks==0.2.5
2+
aiohttp-jinja2==0.16.0
3+
aiosocks
34
lxml
45
fake-useragent
5-
aiohttp_jinja2
66
jinja2
77
peewee-async
88
aiopg

0 commit comments

Comments
 (0)