|
| 1 | +import os.path |
| 2 | +import urllib.parse |
| 3 | + |
| 4 | +import aiohttp |
| 5 | +import asyncio |
| 6 | +import aiofiles |
| 7 | + |
| 8 | + |
| 9 | +def _get_temp_filepath_from_url(url, dirpath): |
| 10 | + url_path_list = urllib.parse.urlsplit(url) |
| 11 | + abs_filepath = url_path_list.path |
| 12 | + basepath = os.path.split(abs_filepath)[-1] |
| 13 | + return os.path.join(dirpath, basepath) |
| 14 | + |
| 15 | + |
| 16 | +async def async_write(path, binary): |
| 17 | + async with aiofiles.open(path, "wb") as f: |
| 18 | + await f.write(binary) |
| 19 | + |
| 20 | + |
| 21 | +async def async_fetch(session, url): |
| 22 | + async with session.get(url) as response: |
| 23 | + status = response.status |
| 24 | + if status != 200: |
| 25 | + binary = None |
| 26 | + else: |
| 27 | + binary = await response.read() |
| 28 | + return url, status, binary |
| 29 | + |
| 30 | + |
| 31 | +async def async_fetch_and_write(session, url, dirpath): |
| 32 | + url, status, binary = await async_fetch(session, url) |
| 33 | + if status != 200: |
| 34 | + filepath = None |
| 35 | + return url, filepath, False |
| 36 | + else: |
| 37 | + filepath = _get_temp_filepath_from_url(url, dirpath) |
| 38 | + await async_write(filepath, binary) |
| 39 | + return url, filepath, True |
| 40 | + |
| 41 | + |
| 42 | +async def create_async_get_request_session_and_run(urls, dirpath): |
| 43 | + async with aiohttp.ClientSession() as session: |
| 44 | + tasks = [] |
| 45 | + for url in urls: |
| 46 | + # use asyncio.ensure_future instead of .run() here to maintain |
| 47 | + # Py3.6 compatibility |
| 48 | + task = asyncio.ensure_future(async_fetch_and_write(session, url, dirpath)) |
| 49 | + tasks.append(task) |
| 50 | + await asyncio.gather(*tasks, return_exceptions=True) |
| 51 | + return tasks |
0 commit comments