|
30 | 30 | from pathlib import Path
|
31 | 31 | from typing import List
|
32 | 32 |
|
33 |
| -import asyncio |
34 | 33 | from tenacity import retry, stop_after_attempt, wait_random_exponential
|
35 | 34 |
|
36 | 35 | import aioboto3
|
| 36 | +from boto3.s3.transfer import TransferConfig |
37 | 37 |
|
38 | 38 | from servicex.models import ResultFile, TransformStatus
|
39 | 39 |
|
40 |
| -_semaphore = asyncio.Semaphore(8) |
| 40 | +_transferconfig = TransferConfig(max_concurrency=10) |
41 | 41 |
|
42 | 42 |
|
43 |
| -def init_download_semaphore(concurrency: int = 8): |
| 43 | +def init_s3_config(concurrency: int = 10): |
44 | 44 | "Update the number of concurrent connections"
|
45 |
| - global _semaphore |
46 |
| - _semaphore = asyncio.Semaphore(concurrency) |
| 45 | + global _transferconfig |
| 46 | + _transferconfig = TransferConfig(max_concurrency=concurrency) |
47 | 47 |
|
48 | 48 |
|
49 | 49 | def _sanitize_filename(fname: str):
|
@@ -114,31 +114,29 @@ async def download_file(
|
114 | 114 | )
|
115 | 115 | )
|
116 | 116 |
|
117 |
| - async with _semaphore: |
118 |
| - async with self.minio.resource("s3", endpoint_url=self.endpoint_host) as s3: |
119 |
| - obj = await s3.Object(self.bucket, object_name) |
120 |
| - remotesize = await obj.content_length |
121 |
| - if path.exists(): |
122 |
| - # if file size is the same, let's not download anything |
123 |
| - # maybe move to a better verification mechanism with e-tags in the future |
124 |
| - localsize = path.stat().st_size |
125 |
| - if localsize == remotesize: |
126 |
| - return path.resolve() |
127 |
| - await obj.download_file(path.as_posix()) |
| 117 | + async with self.minio.resource("s3", endpoint_url=self.endpoint_host) as s3: |
| 118 | + obj = await s3.Object(self.bucket, object_name) |
| 119 | + remotesize = await obj.content_length |
| 120 | + if path.exists(): |
| 121 | + # if file size is the same, let's not download anything |
| 122 | + # maybe move to a better verification mechanism with e-tags in the future |
128 | 123 | localsize = path.stat().st_size
|
129 |
| - if localsize != remotesize: |
130 |
| - raise RuntimeError(f"Download of {object_name} failed") |
| 124 | + if localsize == remotesize: |
| 125 | + return path.resolve() |
| 126 | + await obj.download_file(path.as_posix(), Config=_transferconfig) |
| 127 | + localsize = path.stat().st_size |
| 128 | + if localsize != remotesize: |
| 129 | + raise RuntimeError(f"Download of {object_name} failed") |
131 | 130 | return path.resolve()
|
132 | 131 |
|
133 | 132 | @retry(
|
134 | 133 | stop=stop_after_attempt(3), wait=wait_random_exponential(max=60), reraise=True
|
135 | 134 | )
|
136 | 135 | async def get_signed_url(self, object_name: str) -> str:
|
137 |
| - async with _semaphore: |
138 |
| - async with self.minio.client("s3", endpoint_url=self.endpoint_host) as s3: |
139 |
| - return await s3.generate_presigned_url( |
140 |
| - "get_object", Params={"Bucket": self.bucket, "Key": object_name} |
141 |
| - ) |
| 136 | + async with self.minio.client("s3", endpoint_url=self.endpoint_host) as s3: |
| 137 | + return await s3.generate_presigned_url( |
| 138 | + "get_object", Params={"Bucket": self.bucket, "Key": object_name} |
| 139 | + ) |
142 | 140 |
|
143 | 141 | @classmethod
|
144 | 142 | def hash_path(cls, file_name):
|
|
0 commit comments