Skip to content

Commit 258f9ca

Browse files
Provide better defaults for the timeouts across all scan methods.
1 parent f0d188f commit 258f9ca

File tree

1 file changed

+76
-26
lines changed
  • arachnid_shield_sdk/api

1 file changed

+76
-26
lines changed

arachnid_shield_sdk/api/v1.py

Lines changed: 76 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,22 @@
1818
)
1919

2020

21+
TIMEOUT_WRITE_PERMISSIVE = httpx.Timeout(
22+
60, # Default timeout for all operations unless otherwise stated.
23+
connect=3,
24+
# Large chunks can take arbitrarily long to complete a write
25+
# so wait arbitrarily long to finish writes.
26+
write=None,
27+
)
28+
29+
TIMEOUT_READ_PERMISSIVE = httpx.Timeout(
30+
60, # Default timeout for all operations unless otherwise stated.
31+
connect=3,
32+
# Allow the server enough time to process the request and to read the response back.
33+
read=60
34+
)
35+
36+
2137
class ArachnidShield(_ArachnidShield):
2238
"""A client to communicate with the Arachnid Shield API
2339
provided by the Canadian Centre for Child Protection.
@@ -34,7 +50,7 @@ def scan_media_from_bytes(
3450
self,
3551
contents: typing.Union[bytes, io.BytesIO],
3652
mime_type: str,
37-
timeout: typing.Optional[httpx.Timeout] = None,
53+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
3854
) -> ScannedMedia:
3955
"""Given the contents of some media, along with a mime type,
4056
scan the contents for matches against known child abuse media.
@@ -43,8 +59,7 @@ def scan_media_from_bytes(
4359
contents: The raw bytes that represent the media.
4460
mime_type: The mimetype of the media.
4561
timeout:
46-
If provided, will set a timeout configuration for the underlying http client.
47-
Otherwise, will disable the timeout entirely.
62+
If provided, will set a timeout configuration for the underlying http client.
4863
4964
Returns:
5065
The record of a successful media scan.
@@ -59,7 +74,7 @@ def scan_media_from_file(
5974
self,
6075
filepath: pathlib.Path,
6176
mime_type_override: typing.Optional[str] = None,
62-
timeout: typing.Optional[httpx.Timeout] = None,
77+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
6378
) -> ScannedMedia:
6479
"""Given path to the media file to scan, and an optional
6580
value for mime_type that bypasses guessing it based of the filepath,
@@ -72,8 +87,7 @@ def scan_media_from_file(
7287
If provided, will use this as the mime_type
7388
instead of guessing it from the filepath.
7489
timeout:
75-
If provided, will set a timeout configuration for the underlying http client.
76-
Otherwise, will disable the timeout entirely.
90+
If provided, will set a timeout configuration for the underlying http client.
7791
7892
Returns:
7993
The record of a successful media scan.
@@ -103,12 +117,18 @@ def scan_media_from_file(
103117
config = ScanMediaFromBytes(contents=contents, mime_type=mime_type)
104118
return self.scan_media_from_bytes_with_config(config, timeout=timeout)
105119

106-
def scan_media_from_url(self, url: str) -> ScannedMedia:
120+
def scan_media_from_url(
121+
self,
122+
url: str,
123+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
124+
) -> ScannedMedia:
107125
"""Given the absolute url that hosts the media we wish to scan,
108126
scan the contents of that url for matches against known harmful content.
109127
110128
Args:
111129
url: The absolute URL to scan.
130+
timeout:
131+
If provided, will set a timeout configuration for the underlying http client.
112132
113133
Returns:
114134
The record of a successful media scan.
@@ -117,21 +137,20 @@ def scan_media_from_url(self, url: str) -> ScannedMedia:
117137
`ArachnidShieldError` on a failed but complete interaction with
118138
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
119139
"""
120-
return self.scan_media_from_url_with_config(ScanMediaFromUrl(url=url))
140+
return self.scan_media_from_url_with_config(ScanMediaFromUrl(url=url), timeout=timeout)
121141

122142
def scan_media_from_bytes_with_config(
123143
self,
124144
config: ScanMediaFromBytes,
125-
timeout: typing.Optional[httpx.Timeout] = httpx.Timeout(5)
145+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
126146
) -> ScannedMedia:
127147
"""Given the contents of some media, along with a mime type,
128148
scan the contents for matches against known child abuse media.
129149
130150
Args:
131151
config: The context that will be used to build the request.
132152
timeout:
133-
If provided explicitly, a configuration passed to the underlying http client.
134-
It defaults to 5 seconds, and can be disabled by setting it to `None`.
153+
If provided, will set a timeout configuration for the underlying http client.
135154
136155
Returns:
137156
ScannedMedia: A record of a successful scan of the media.
@@ -156,12 +175,18 @@ def scan_media_from_bytes_with_config(
156175
response.raise_for_status()
157176
return ScannedMedia.from_dict(response.json())
158177

159-
def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMedia:
178+
def scan_media_from_url_with_config(
179+
self,
180+
config: ScanMediaFromUrl,
181+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
182+
) -> ScannedMedia:
160183
"""Given the absolute url that hosts the media we wish to scan,
161184
scan the contents of that url for matches against known harmful content.
162185
163186
Args:
164187
config: The context that will be used to build the request.
188+
timeout:
189+
If provided, will set a timeout configuration for the underlying http client.
165190
166191
Returns:
167192
ScannedMedia: A record of a successful scan of the media.
@@ -177,6 +202,7 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
177202
url=_url,
178203
headers={"Content-Type": "application/json"},
179204
json=config.to_dict(),
205+
timeout=timeout,
180206
)
181207

182208
if response.is_client_error or response.is_server_error:
@@ -186,11 +212,17 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
186212
response.raise_for_status()
187213
return ScannedMedia.from_dict(response.json())
188214

189-
def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
215+
def scan_pdq_hashes(
216+
self,
217+
config: ScanMediaFromPdq,
218+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
219+
) -> ScannedPDQHashes:
190220
"""
191221
Scan medias for CSAM based on their PDQ hashes.
192222
Args:
193223
config: The context that will be used to build the request.
224+
timeout:
225+
If provided, will set a timeout configuration for the underlying http client.
194226
195227
Returns:
196228
ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -205,6 +237,7 @@ def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
205237
url=_url,
206238
headers={"Content-Type": "application/json"},
207239
json=config.to_dict(),
240+
timeout=timeout,
208241
)
209242
if response.is_client_error or response.is_server_error:
210243
error_detail = ErrorDetail.from_dict(response.json())
@@ -229,7 +262,7 @@ async def scan_media_from_bytes(
229262
self,
230263
contents: typing.Union[bytes, io.BytesIO],
231264
mime_type: str,
232-
timeout: typing.Optional[httpx.Timeout] = None,
265+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
233266
) -> ScannedMedia:
234267
"""Given the contents of some media, along with a mime type,
235268
scan the contents for matches against known child abuse media.
@@ -238,8 +271,7 @@ async def scan_media_from_bytes(
238271
contents: The raw bytes that represent the media.
239272
mime_type: The mimetype of the media.
240273
timeout:
241-
If provided, will set a timeout configuration for the underlying http client.
242-
Otherwise, will disable the timeout entirely.
274+
If provided, will set a timeout configuration for the underlying http client.
243275
244276
Returns:
245277
The record of a successful media scan.
@@ -251,12 +283,18 @@ async def scan_media_from_bytes(
251283

252284
return await self.scan_media_from_bytes_with_config(ScanMediaFromBytes(contents=contents, mime_type=mime_type), timeout=timeout)
253285

254-
async def scan_media_from_url(self, url: str) -> ScannedMedia:
286+
async def scan_media_from_url(
287+
self,
288+
url: str,
289+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
290+
) -> ScannedMedia:
255291
"""Given the absolute url that hosts the media we wish to scan,
256292
scan the contents of that url for matches against known harmful content.
257293
258294
Args:
259295
url: The absolute URL to scan.
296+
timeout:
297+
If provided, will set a timeout configuration for the underlying http client.
260298
261299
Returns:
262300
The record of a successful media scan.
@@ -265,13 +303,13 @@ async def scan_media_from_url(self, url: str) -> ScannedMedia:
265303
`ArachnidShieldError` on a failed but complete interaction with
266304
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
267305
"""
268-
return await self.scan_media_from_url_with_config(ScanMediaFromUrl(url=url))
306+
return await self.scan_media_from_url_with_config(ScanMediaFromUrl(url=url), timeout=timeout)
269307

270308
async def scan_media_from_file(
271309
self,
272310
filepath: pathlib.Path,
273311
mime_type_override: typing.Optional[str] = None,
274-
timeout: typing.Optional[httpx.Timeout] = None,
312+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
275313
) -> ScannedMedia:
276314
"""Given path to the media file to scan, and an optional
277315
value for mime_type that bypasses guessing it based of the filepath,
@@ -284,8 +322,7 @@ async def scan_media_from_file(
284322
If provided, will use this as the mime_type
285323
instead of guessing it from the filepath.
286324
timeout:
287-
If provided, will set a timeout configuration for the underlying http client.
288-
Otherwise, will disable the timeout entirely.
325+
If provided, will set a timeout configuration for the underlying http client.
289326
290327
Returns:
291328
The record of a successful media scan.
@@ -318,16 +355,15 @@ async def scan_media_from_file(
318355
async def scan_media_from_bytes_with_config(
319356
self,
320357
config: ScanMediaFromBytes,
321-
timeout: typing.Optional[httpx.Timeout] = httpx.Timeout(5)
358+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
322359
) -> ScannedMedia:
323360
"""Given the contents of some media, along with a mime type,
324361
scan the contents for matches against known child abuse media.
325362
326363
Args:
327364
config: The context that will be used to build the request.
328365
timeout:
329-
If provided explicitly, a configuration passed to the underlying http client.
330-
It defaults to 5 seconds, and can be disabled by setting it to `None`.
366+
If provided, will set a timeout configuration for the underlying http client.
331367
332368
Returns:
333369
ScannedMedia: A record of a successful scan of the media.
@@ -353,12 +389,18 @@ async def scan_media_from_bytes_with_config(
353389
response.raise_for_status()
354390
return ScannedMedia.from_dict(response.json())
355391

356-
async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMedia:
392+
async def scan_media_from_url_with_config(
393+
self,
394+
config: ScanMediaFromUrl,
395+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
396+
) -> ScannedMedia:
357397
"""Given the absolute url that hosts the media we wish to scan,
358398
scan the contents of that url for matches against known harmful content.
359399
360400
Args:
361401
config: The context that will be used to build the request.
402+
timeout:
403+
If provided, will set a timeout configuration for the underlying http client.
362404
363405
Returns:
364406
ScannedMedia: A record of a successful scan of the media.
@@ -374,6 +416,7 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
374416
url=_url,
375417
headers={"Content-Type": "application/json"},
376418
json=config.to_dict(),
419+
timeout=timeout,
377420
)
378421

379422
if response.is_client_error or response.is_server_error:
@@ -383,11 +426,17 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
383426
response.raise_for_status()
384427
return ScannedMedia.from_dict(response.json())
385428

386-
async def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
429+
async def scan_pdq_hashes(
430+
self,
431+
config: ScanMediaFromPdq,
432+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
433+
) -> ScannedPDQHashes:
387434
"""
388435
Scan medias for CSAM based on their PDQ hashes.
389436
Args:
390437
config: The context that will be used to build the request.
438+
timeout:
439+
If provided, will set a timeout configuration for the underlying http client.
391440
392441
Returns:
393442
ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -402,6 +451,7 @@ async def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
402451
url=_url,
403452
headers={"Content-Type": "application/json"},
404453
json=config.to_dict(),
454+
timeout=timeout,
405455
)
406456
if response.is_client_error or response.is_server_error:
407457
error_detail = ErrorDetail.from_dict(response.json())

0 commit comments

Comments
 (0)