18
18
)
19
19
20
20
21
+ TIMEOUT_WRITE_PERMISSIVE = httpx .Timeout (
22
+ 60 , # Default timeout for all operations unless otherwise stated.
23
+ connect = 3 ,
24
+ # Large chunks can take arbitrarily long to complete a write
25
+ # so wait arbitrarily long to finish writes.
26
+ write = None ,
27
+ )
28
+
29
+ TIMEOUT_READ_PERMISSIVE = httpx .Timeout (
30
+ 60 , # Default timeout for all operations unless otherwise stated.
31
+ connect = 3 ,
32
+ # Allow the server enough time to process the request and to read the response back.
33
+ read = 60
34
+ )
35
+
36
+
21
37
class ArachnidShield (_ArachnidShield ):
22
38
"""A client to communicate with the Arachnid Shield API
23
39
provided by the Canadian Centre for Child Protection.
@@ -34,7 +50,7 @@ def scan_media_from_bytes(
34
50
self ,
35
51
contents : typing .Union [bytes , io .BytesIO ],
36
52
mime_type : str ,
37
- timeout : typing .Optional [httpx .Timeout ] = None ,
53
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
38
54
) -> ScannedMedia :
39
55
"""Given the contents of some media, along with a mime type,
40
56
scan the contents for matches against known child abuse media.
@@ -43,8 +59,7 @@ def scan_media_from_bytes(
43
59
contents: The raw bytes that represent the media.
44
60
mime_type: The mimetype of the media.
45
61
timeout:
46
- If provided, will set a timeout configuration for the underlying http client.
47
- Otherwise, will disable the timeout entirely.
62
+ If provided, will set a timeout configuration for the underlying http client.
48
63
49
64
Returns:
50
65
The record of a successful media scan.
@@ -59,7 +74,7 @@ def scan_media_from_file(
59
74
self ,
60
75
filepath : pathlib .Path ,
61
76
mime_type_override : typing .Optional [str ] = None ,
62
- timeout : typing .Optional [httpx .Timeout ] = None ,
77
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
63
78
) -> ScannedMedia :
64
79
"""Given path to the media file to scan, and an optional
65
80
value for mime_type that bypasses guessing it based of the filepath,
@@ -72,8 +87,7 @@ def scan_media_from_file(
72
87
If provided, will use this as the mime_type
73
88
instead of guessing it from the filepath.
74
89
timeout:
75
- If provided, will set a timeout configuration for the underlying http client.
76
- Otherwise, will disable the timeout entirely.
90
+ If provided, will set a timeout configuration for the underlying http client.
77
91
78
92
Returns:
79
93
The record of a successful media scan.
@@ -103,12 +117,18 @@ def scan_media_from_file(
103
117
config = ScanMediaFromBytes (contents = contents , mime_type = mime_type )
104
118
return self .scan_media_from_bytes_with_config (config , timeout = timeout )
105
119
106
- def scan_media_from_url (self , url : str ) -> ScannedMedia :
120
+ def scan_media_from_url (
121
+ self ,
122
+ url : str ,
123
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
124
+ ) -> ScannedMedia :
107
125
"""Given the absolute url that hosts the media we wish to scan,
108
126
scan the contents of that url for matches against known harmful content.
109
127
110
128
Args:
111
129
url: The absolute URL to scan.
130
+ timeout:
131
+ If provided, will set a timeout configuration for the underlying http client.
112
132
113
133
Returns:
114
134
The record of a successful media scan.
@@ -117,21 +137,20 @@ def scan_media_from_url(self, url: str) -> ScannedMedia:
117
137
`ArachnidShieldError` on a failed but complete interaction with
118
138
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
119
139
"""
120
- return self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ))
140
+ return self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ), timeout = timeout )
121
141
122
142
def scan_media_from_bytes_with_config (
123
143
self ,
124
144
config : ScanMediaFromBytes ,
125
- timeout : typing .Optional [httpx .Timeout ] = httpx . Timeout ( 5 )
145
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
126
146
) -> ScannedMedia :
127
147
"""Given the contents of some media, along with a mime type,
128
148
scan the contents for matches against known child abuse media.
129
149
130
150
Args:
131
151
config: The context that will be used to build the request.
132
152
timeout:
133
- If provided explicitly, a configuration passed to the underlying http client.
134
- It defaults to 5 seconds, and can be disabled by setting it to `None`.
153
+ If provided, will set a timeout configuration for the underlying http client.
135
154
136
155
Returns:
137
156
ScannedMedia: A record of a successful scan of the media.
@@ -156,12 +175,18 @@ def scan_media_from_bytes_with_config(
156
175
response .raise_for_status ()
157
176
return ScannedMedia .from_dict (response .json ())
158
177
159
- def scan_media_from_url_with_config (self , config : ScanMediaFromUrl ) -> ScannedMedia :
178
+ def scan_media_from_url_with_config (
179
+ self ,
180
+ config : ScanMediaFromUrl ,
181
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
182
+ ) -> ScannedMedia :
160
183
"""Given the absolute url that hosts the media we wish to scan,
161
184
scan the contents of that url for matches against known harmful content.
162
185
163
186
Args:
164
187
config: The context that will be used to build the request.
188
+ timeout:
189
+ If provided, will set a timeout configuration for the underlying http client.
165
190
166
191
Returns:
167
192
ScannedMedia: A record of a successful scan of the media.
@@ -177,6 +202,7 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
177
202
url = _url ,
178
203
headers = {"Content-Type" : "application/json" },
179
204
json = config .to_dict (),
205
+ timeout = timeout ,
180
206
)
181
207
182
208
if response .is_client_error or response .is_server_error :
@@ -186,11 +212,17 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
186
212
response .raise_for_status ()
187
213
return ScannedMedia .from_dict (response .json ())
188
214
189
- def scan_pdq_hashes (self , config : ScanMediaFromPdq ) -> ScannedPDQHashes :
215
+ def scan_pdq_hashes (
216
+ self ,
217
+ config : ScanMediaFromPdq ,
218
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
219
+ ) -> ScannedPDQHashes :
190
220
"""
191
221
Scan medias for CSAM based on their PDQ hashes.
192
222
Args:
193
223
config: The context that will be used to build the request.
224
+ timeout:
225
+ If provided, will set a timeout configuration for the underlying http client.
194
226
195
227
Returns:
196
228
ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -205,6 +237,7 @@ def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
205
237
url = _url ,
206
238
headers = {"Content-Type" : "application/json" },
207
239
json = config .to_dict (),
240
+ timeout = timeout ,
208
241
)
209
242
if response .is_client_error or response .is_server_error :
210
243
error_detail = ErrorDetail .from_dict (response .json ())
@@ -229,7 +262,7 @@ async def scan_media_from_bytes(
229
262
self ,
230
263
contents : typing .Union [bytes , io .BytesIO ],
231
264
mime_type : str ,
232
- timeout : typing .Optional [httpx .Timeout ] = None ,
265
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
233
266
) -> ScannedMedia :
234
267
"""Given the contents of some media, along with a mime type,
235
268
scan the contents for matches against known child abuse media.
@@ -238,8 +271,7 @@ async def scan_media_from_bytes(
238
271
contents: The raw bytes that represent the media.
239
272
mime_type: The mimetype of the media.
240
273
timeout:
241
- If provided, will set a timeout configuration for the underlying http client.
242
- Otherwise, will disable the timeout entirely.
274
+ If provided, will set a timeout configuration for the underlying http client.
243
275
244
276
Returns:
245
277
The record of a successful media scan.
@@ -251,12 +283,18 @@ async def scan_media_from_bytes(
251
283
252
284
return await self .scan_media_from_bytes_with_config (ScanMediaFromBytes (contents = contents , mime_type = mime_type ), timeout = timeout )
253
285
254
- async def scan_media_from_url (self , url : str ) -> ScannedMedia :
286
+ async def scan_media_from_url (
287
+ self ,
288
+ url : str ,
289
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
290
+ ) -> ScannedMedia :
255
291
"""Given the absolute url that hosts the media we wish to scan,
256
292
scan the contents of that url for matches against known harmful content.
257
293
258
294
Args:
259
295
url: The absolute URL to scan.
296
+ timeout:
297
+ If provided, will set a timeout configuration for the underlying http client.
260
298
261
299
Returns:
262
300
The record of a successful media scan.
@@ -265,13 +303,13 @@ async def scan_media_from_url(self, url: str) -> ScannedMedia:
265
303
`ArachnidShieldError` on a failed but complete interaction with
266
304
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
267
305
"""
268
- return await self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ))
306
+ return await self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ), timeout = timeout )
269
307
270
308
async def scan_media_from_file (
271
309
self ,
272
310
filepath : pathlib .Path ,
273
311
mime_type_override : typing .Optional [str ] = None ,
274
- timeout : typing .Optional [httpx .Timeout ] = None ,
312
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
275
313
) -> ScannedMedia :
276
314
"""Given path to the media file to scan, and an optional
277
315
value for mime_type that bypasses guessing it based of the filepath,
@@ -284,8 +322,7 @@ async def scan_media_from_file(
284
322
If provided, will use this as the mime_type
285
323
instead of guessing it from the filepath.
286
324
timeout:
287
- If provided, will set a timeout configuration for the underlying http client.
288
- Otherwise, will disable the timeout entirely.
325
+ If provided, will set a timeout configuration for the underlying http client.
289
326
290
327
Returns:
291
328
The record of a successful media scan.
@@ -318,16 +355,15 @@ async def scan_media_from_file(
318
355
async def scan_media_from_bytes_with_config (
319
356
self ,
320
357
config : ScanMediaFromBytes ,
321
- timeout : typing .Optional [httpx .Timeout ] = httpx . Timeout ( 5 )
358
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
322
359
) -> ScannedMedia :
323
360
"""Given the contents of some media, along with a mime type,
324
361
scan the contents for matches against known child abuse media.
325
362
326
363
Args:
327
364
config: The context that will be used to build the request.
328
365
timeout:
329
- If provided explicitly, a configuration passed to the underlying http client.
330
- It defaults to 5 seconds, and can be disabled by setting it to `None`.
366
+ If provided, will set a timeout configuration for the underlying http client.
331
367
332
368
Returns:
333
369
ScannedMedia: A record of a successful scan of the media.
@@ -353,12 +389,18 @@ async def scan_media_from_bytes_with_config(
353
389
response .raise_for_status ()
354
390
return ScannedMedia .from_dict (response .json ())
355
391
356
- async def scan_media_from_url_with_config (self , config : ScanMediaFromUrl ) -> ScannedMedia :
392
+ async def scan_media_from_url_with_config (
393
+ self ,
394
+ config : ScanMediaFromUrl ,
395
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
396
+ ) -> ScannedMedia :
357
397
"""Given the absolute url that hosts the media we wish to scan,
358
398
scan the contents of that url for matches against known harmful content.
359
399
360
400
Args:
361
401
config: The context that will be used to build the request.
402
+ timeout:
403
+ If provided, will set a timeout configuration for the underlying http client.
362
404
363
405
Returns:
364
406
ScannedMedia: A record of a successful scan of the media.
@@ -374,6 +416,7 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
374
416
url = _url ,
375
417
headers = {"Content-Type" : "application/json" },
376
418
json = config .to_dict (),
419
+ timeout = timeout ,
377
420
)
378
421
379
422
if response .is_client_error or response .is_server_error :
@@ -383,11 +426,17 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
383
426
response .raise_for_status ()
384
427
return ScannedMedia .from_dict (response .json ())
385
428
386
- async def scan_pdq_hashes (self , config : ScanMediaFromPdq ) -> ScannedPDQHashes :
429
+ async def scan_pdq_hashes (
430
+ self ,
431
+ config : ScanMediaFromPdq ,
432
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
433
+ ) -> ScannedPDQHashes :
387
434
"""
388
435
Scan medias for CSAM based on their PDQ hashes.
389
436
Args:
390
437
config: The context that will be used to build the request.
438
+ timeout:
439
+ If provided, will set a timeout configuration for the underlying http client.
391
440
392
441
Returns:
393
442
ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -402,6 +451,7 @@ async def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
402
451
url = _url ,
403
452
headers = {"Content-Type" : "application/json" },
404
453
json = config .to_dict (),
454
+ timeout = timeout ,
405
455
)
406
456
if response .is_client_error or response .is_server_error :
407
457
error_detail = ErrorDetail .from_dict (response .json ())
0 commit comments