Skip to content

Commit 7fa49bd

Browse files
Merge pull request #3 from CdnCentreForChildProtection/fix-timeout-on-bytes
Allow configurable timeouts for the scan methods that submit large byte bodies.
2 parents a3b2999 + 258f9ca commit 7fa49bd

File tree

1 file changed

+114
-20
lines changed
  • arachnid_shield_sdk/api

1 file changed

+114
-20
lines changed

arachnid_shield_sdk/api/v1.py

Lines changed: 114 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,22 @@
1818
)
1919

2020

21+
TIMEOUT_WRITE_PERMISSIVE = httpx.Timeout(
22+
60, # Default timeout for all operations unless otherwise stated.
23+
connect=3,
24+
# Large chunks can take arbitrarily long to complete a write
25+
# so wait arbitrarily long to finish writes.
26+
write=None,
27+
)
28+
29+
TIMEOUT_READ_PERMISSIVE = httpx.Timeout(
30+
60, # Default timeout for all operations unless otherwise stated.
31+
connect=3,
32+
# Allow the server enough time to process the request and to read the response back.
33+
read=60
34+
)
35+
36+
2137
class ArachnidShield(_ArachnidShield):
2238
"""A client to communicate with the Arachnid Shield API
2339
provided by the Canadian Centre for Child Protection.
@@ -30,13 +46,20 @@ def __init__(self, username: typing.Union[str, bytes], password: typing.Union[st
3046
super().__init__(username=username, password=password)
3147
self.__client = super()._build_sync_http_client()
3248

33-
def scan_media_from_bytes(self, contents: typing.Union[bytes, io.BytesIO], mime_type: str) -> ScannedMedia:
49+
def scan_media_from_bytes(
50+
self,
51+
contents: typing.Union[bytes, io.BytesIO],
52+
mime_type: str,
53+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
54+
) -> ScannedMedia:
3455
"""Given the contents of some media, along with a mime type,
3556
scan the contents for matches against known child abuse media.
3657
3758
Args:
3859
contents: The raw bytes that represent the media.
3960
mime_type: The mimetype of the media.
61+
timeout:
62+
If provided, will set a timeout configuration for the underlying http client.
4063
4164
Returns:
4265
The record of a successful media scan.
@@ -45,10 +68,13 @@ def scan_media_from_bytes(self, contents: typing.Union[bytes, io.BytesIO], mime_
4568
`ArachnidShieldError` on a failed but complete interaction with
4669
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
4770
"""
48-
return self.scan_media_from_bytes_with_config(ScanMediaFromBytes(contents=contents, mime_type=mime_type))
71+
return self.scan_media_from_bytes_with_config(ScanMediaFromBytes(contents=contents, mime_type=mime_type), timeout=timeout)
4972

5073
def scan_media_from_file(
51-
self, filepath: pathlib.Path, mime_type_override: typing.Optional[str] = None
74+
self,
75+
filepath: pathlib.Path,
76+
mime_type_override: typing.Optional[str] = None,
77+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
5278
) -> ScannedMedia:
5379
"""Given path to the media file to scan, and an optional
5480
value for mime_type that bypasses guessing it based of the filepath,
@@ -60,6 +86,8 @@ def scan_media_from_file(
6086
mime_type_override:
6187
If provided, will use this as the mime_type
6288
instead of guessing it from the filepath.
89+
timeout:
90+
If provided, will set a timeout configuration for the underlying http client.
6391
6492
Returns:
6593
The record of a successful media scan.
@@ -78,7 +106,7 @@ def scan_media_from_file(
78106
detail=(
79107
f"Failed to identify mime_type for {filepath}. "
80108
f"You may specify it explicitly by providing "
81-
f"`force_mime_type`."
109+
f"`mime_type_override`."
82110
)
83111
)
84112
)
@@ -87,14 +115,20 @@ def scan_media_from_file(
87115
contents = f.read()
88116

89117
config = ScanMediaFromBytes(contents=contents, mime_type=mime_type)
90-
return self.scan_media_from_bytes_with_config(config)
118+
return self.scan_media_from_bytes_with_config(config, timeout=timeout)
91119

92-
def scan_media_from_url(self, url: str) -> ScannedMedia:
120+
def scan_media_from_url(
121+
self,
122+
url: str,
123+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
124+
) -> ScannedMedia:
93125
"""Given the absolute url that hosts the media we wish to scan,
94126
scan the contents of that url for matches against known harmful content.
95127
96128
Args:
97129
url: The absolute URL to scan.
130+
timeout:
131+
If provided, will set a timeout configuration for the underlying http client.
98132
99133
Returns:
100134
The record of a successful media scan.
@@ -103,14 +137,20 @@ def scan_media_from_url(self, url: str) -> ScannedMedia:
103137
`ArachnidShieldError` on a failed but complete interaction with
104138
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
105139
"""
106-
return self.scan_media_from_url_with_config(ScanMediaFromUrl(url=url))
140+
return self.scan_media_from_url_with_config(ScanMediaFromUrl(url=url), timeout=timeout)
107141

108-
def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) -> ScannedMedia:
142+
def scan_media_from_bytes_with_config(
143+
self,
144+
config: ScanMediaFromBytes,
145+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
146+
) -> ScannedMedia:
109147
"""Given the contents of some media, along with a mime type,
110148
scan the contents for matches against known child abuse media.
111149
112150
Args:
113151
config: The context that will be used to build the request.
152+
timeout:
153+
If provided, will set a timeout configuration for the underlying http client.
114154
115155
Returns:
116156
ScannedMedia: A record of a successful scan of the media.
@@ -125,6 +165,7 @@ def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) -> Scann
125165
url=url,
126166
headers={"Content-Type": config.mime_type},
127167
content=config.contents,
168+
timeout=timeout,
128169
)
129170

130171
if response.is_client_error or response.is_server_error:
@@ -134,12 +175,18 @@ def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) -> Scann
134175
response.raise_for_status()
135176
return ScannedMedia.from_dict(response.json())
136177

137-
def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMedia:
178+
def scan_media_from_url_with_config(
179+
self,
180+
config: ScanMediaFromUrl,
181+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
182+
) -> ScannedMedia:
138183
"""Given the absolute url that hosts the media we wish to scan,
139184
scan the contents of that url for matches against known harmful content.
140185
141186
Args:
142187
config: The context that will be used to build the request.
188+
timeout:
189+
If provided, will set a timeout configuration for the underlying http client.
143190
144191
Returns:
145192
ScannedMedia: A record of a successful scan of the media.
@@ -155,6 +202,7 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
155202
url=_url,
156203
headers={"Content-Type": "application/json"},
157204
json=config.to_dict(),
205+
timeout=timeout,
158206
)
159207

160208
if response.is_client_error or response.is_server_error:
@@ -164,11 +212,17 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
164212
response.raise_for_status()
165213
return ScannedMedia.from_dict(response.json())
166214

167-
def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
215+
def scan_pdq_hashes(
216+
self,
217+
config: ScanMediaFromPdq,
218+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
219+
) -> ScannedPDQHashes:
168220
"""
169221
Scan medias for CSAM based on their PDQ hashes.
170222
Args:
171223
config: The context that will be used to build the request.
224+
timeout:
225+
If provided, will set a timeout configuration for the underlying http client.
172226
173227
Returns:
174228
ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -183,6 +237,7 @@ def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
183237
url=_url,
184238
headers={"Content-Type": "application/json"},
185239
json=config.to_dict(),
240+
timeout=timeout,
186241
)
187242
if response.is_client_error or response.is_server_error:
188243
error_detail = ErrorDetail.from_dict(response.json())
@@ -203,13 +258,20 @@ def __init__(self, username: typing.Union[str, bytes], password: typing.Union[st
203258
super().__init__(username=username, password=password)
204259
self.__client = super()._build_async_http_client()
205260

206-
async def scan_media_from_bytes(self, contents: typing.Union[bytes, io.BytesIO], mime_type: str) -> ScannedMedia:
261+
async def scan_media_from_bytes(
262+
self,
263+
contents: typing.Union[bytes, io.BytesIO],
264+
mime_type: str,
265+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
266+
) -> ScannedMedia:
207267
"""Given the contents of some media, along with a mime type,
208268
scan the contents for matches against known child abuse media.
209269
210270
Args:
211271
contents: The raw bytes that represent the media.
212272
mime_type: The mimetype of the media.
273+
timeout:
274+
If provided, will set a timeout configuration for the underlying http client.
213275
214276
Returns:
215277
The record of a successful media scan.
@@ -219,14 +281,20 @@ async def scan_media_from_bytes(self, contents: typing.Union[bytes, io.BytesIO],
219281
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
220282
"""
221283

222-
return await self.scan_media_from_bytes_with_config(ScanMediaFromBytes(contents=contents, mime_type=mime_type))
284+
return await self.scan_media_from_bytes_with_config(ScanMediaFromBytes(contents=contents, mime_type=mime_type), timeout=timeout)
223285

224-
async def scan_media_from_url(self, url: str) -> ScannedMedia:
286+
async def scan_media_from_url(
287+
self,
288+
url: str,
289+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
290+
) -> ScannedMedia:
225291
"""Given the absolute url that hosts the media we wish to scan,
226292
scan the contents of that url for matches against known harmful content.
227293
228294
Args:
229295
url: The absolute URL to scan.
296+
timeout:
297+
If provided, will set a timeout configuration for the underlying http client.
230298
231299
Returns:
232300
The record of a successful media scan.
@@ -235,10 +303,13 @@ async def scan_media_from_url(self, url: str) -> ScannedMedia:
235303
`ArachnidShieldError` on a failed but complete interaction with
236304
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
237305
"""
238-
return await self.scan_media_from_url_with_config(ScanMediaFromUrl(url=url))
306+
return await self.scan_media_from_url_with_config(ScanMediaFromUrl(url=url), timeout=timeout)
239307

240308
async def scan_media_from_file(
241-
self, filepath: pathlib.Path, mime_type_override: typing.Optional[str] = None
309+
self,
310+
filepath: pathlib.Path,
311+
mime_type_override: typing.Optional[str] = None,
312+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
242313
) -> ScannedMedia:
243314
"""Given path to the media file to scan, and an optional
244315
value for mime_type that bypasses guessing it based of the filepath,
@@ -250,6 +321,8 @@ async def scan_media_from_file(
250321
mime_type_override:
251322
If provided, will use this as the mime_type
252323
instead of guessing it from the filepath.
324+
timeout:
325+
If provided, will set a timeout configuration for the underlying http client.
253326
254327
Returns:
255328
The record of a successful media scan.
@@ -268,7 +341,7 @@ async def scan_media_from_file(
268341
detail=(
269342
f"Failed to identify mime_type for {filepath}. "
270343
f"You may specify it explicitly by providing "
271-
f"`force_mime_type`."
344+
f"`mime_type_override`."
272345
)
273346
)
274347
)
@@ -277,14 +350,20 @@ async def scan_media_from_file(
277350
contents = f.read()
278351

279352
config = ScanMediaFromBytes(contents=contents, mime_type=mime_type)
280-
return await self.scan_media_from_bytes_with_config(config)
353+
return await self.scan_media_from_bytes_with_config(config, timeout=timeout)
281354

282-
async def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) -> ScannedMedia:
355+
async def scan_media_from_bytes_with_config(
356+
self,
357+
config: ScanMediaFromBytes,
358+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_WRITE_PERMISSIVE,
359+
) -> ScannedMedia:
283360
"""Given the contents of some media, along with a mime type,
284361
scan the contents for matches against known child abuse media.
285362
286363
Args:
287364
config: The context that will be used to build the request.
365+
timeout:
366+
If provided, will set a timeout configuration for the underlying http client.
288367
289368
Returns:
290369
ScannedMedia: A record of a successful scan of the media.
@@ -300,6 +379,7 @@ async def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) ->
300379
url=url,
301380
headers={"Content-Type": config.mime_type},
302381
content=config.contents,
382+
timeout=timeout,
303383
)
304384

305385
if response.is_client_error or response.is_server_error:
@@ -309,12 +389,18 @@ async def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) ->
309389
response.raise_for_status()
310390
return ScannedMedia.from_dict(response.json())
311391

312-
async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMedia:
392+
async def scan_media_from_url_with_config(
393+
self,
394+
config: ScanMediaFromUrl,
395+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
396+
) -> ScannedMedia:
313397
"""Given the absolute url that hosts the media we wish to scan,
314398
scan the contents of that url for matches against known harmful content.
315399
316400
Args:
317401
config: The context that will be used to build the request.
402+
timeout:
403+
If provided, will set a timeout configuration for the underlying http client.
318404
319405
Returns:
320406
ScannedMedia: A record of a successful scan of the media.
@@ -330,6 +416,7 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
330416
url=_url,
331417
headers={"Content-Type": "application/json"},
332418
json=config.to_dict(),
419+
timeout=timeout,
333420
)
334421

335422
if response.is_client_error or response.is_server_error:
@@ -339,11 +426,17 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
339426
response.raise_for_status()
340427
return ScannedMedia.from_dict(response.json())
341428

342-
async def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
429+
async def scan_pdq_hashes(
430+
self,
431+
config: ScanMediaFromPdq,
432+
timeout: typing.Optional[httpx.Timeout] = TIMEOUT_READ_PERMISSIVE,
433+
) -> ScannedPDQHashes:
343434
"""
344435
Scan medias for CSAM based on their PDQ hashes.
345436
Args:
346437
config: The context that will be used to build the request.
438+
timeout:
439+
If provided, will set a timeout configuration for the underlying http client.
347440
348441
Returns:
349442
ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -358,6 +451,7 @@ async def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
358451
url=_url,
359452
headers={"Content-Type": "application/json"},
360453
json=config.to_dict(),
454+
timeout=timeout,
361455
)
362456
if response.is_client_error or response.is_server_error:
363457
error_detail = ErrorDetail.from_dict(response.json())

0 commit comments

Comments
 (0)