18
18
)
19
19
20
20
21
+ TIMEOUT_WRITE_PERMISSIVE = httpx .Timeout (
22
+ 60 , # Default timeout for all operations unless otherwise stated.
23
+ connect = 3 ,
24
+ # Large chunks can take arbitrarily long to complete a write
25
+ # so wait arbitrarily long to finish writes.
26
+ write = None ,
27
+ )
28
+
29
+ TIMEOUT_READ_PERMISSIVE = httpx .Timeout (
30
+ 60 , # Default timeout for all operations unless otherwise stated.
31
+ connect = 3 ,
32
+ # Allow the server enough time to process the request and to read the response back.
33
+ read = 60
34
+ )
35
+
36
+
21
37
class ArachnidShield (_ArachnidShield ):
22
38
"""A client to communicate with the Arachnid Shield API
23
39
provided by the Canadian Centre for Child Protection.
@@ -30,13 +46,20 @@ def __init__(self, username: typing.Union[str, bytes], password: typing.Union[st
30
46
super ().__init__ (username = username , password = password )
31
47
self .__client = super ()._build_sync_http_client ()
32
48
33
- def scan_media_from_bytes (self , contents : typing .Union [bytes , io .BytesIO ], mime_type : str ) -> ScannedMedia :
49
+ def scan_media_from_bytes (
50
+ self ,
51
+ contents : typing .Union [bytes , io .BytesIO ],
52
+ mime_type : str ,
53
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
54
+ ) -> ScannedMedia :
34
55
"""Given the contents of some media, along with a mime type,
35
56
scan the contents for matches against known child abuse media.
36
57
37
58
Args:
38
59
contents: The raw bytes that represent the media.
39
60
mime_type: The mimetype of the media.
61
+ timeout:
62
+ If provided, will set a timeout configuration for the underlying http client.
40
63
41
64
Returns:
42
65
The record of a successful media scan.
@@ -45,10 +68,13 @@ def scan_media_from_bytes(self, contents: typing.Union[bytes, io.BytesIO], mime_
45
68
`ArachnidShieldError` on a failed but complete interaction with
46
69
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
47
70
"""
48
- return self .scan_media_from_bytes_with_config (ScanMediaFromBytes (contents = contents , mime_type = mime_type ))
71
+ return self .scan_media_from_bytes_with_config (ScanMediaFromBytes (contents = contents , mime_type = mime_type ), timeout = timeout )
49
72
50
73
def scan_media_from_file (
51
- self , filepath : pathlib .Path , mime_type_override : typing .Optional [str ] = None
74
+ self ,
75
+ filepath : pathlib .Path ,
76
+ mime_type_override : typing .Optional [str ] = None ,
77
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
52
78
) -> ScannedMedia :
53
79
"""Given path to the media file to scan, and an optional
54
80
value for mime_type that bypasses guessing it based of the filepath,
@@ -60,6 +86,8 @@ def scan_media_from_file(
60
86
mime_type_override:
61
87
If provided, will use this as the mime_type
62
88
instead of guessing it from the filepath.
89
+ timeout:
90
+ If provided, will set a timeout configuration for the underlying http client.
63
91
64
92
Returns:
65
93
The record of a successful media scan.
@@ -78,7 +106,7 @@ def scan_media_from_file(
78
106
detail = (
79
107
f"Failed to identify mime_type for { filepath } . "
80
108
f"You may specify it explicitly by providing "
81
- f"`force_mime_type `."
109
+ f"`mime_type_override `."
82
110
)
83
111
)
84
112
)
@@ -87,14 +115,20 @@ def scan_media_from_file(
87
115
contents = f .read ()
88
116
89
117
config = ScanMediaFromBytes (contents = contents , mime_type = mime_type )
90
- return self .scan_media_from_bytes_with_config (config )
118
+ return self .scan_media_from_bytes_with_config (config , timeout = timeout )
91
119
92
- def scan_media_from_url (self , url : str ) -> ScannedMedia :
120
+ def scan_media_from_url (
121
+ self ,
122
+ url : str ,
123
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
124
+ ) -> ScannedMedia :
93
125
"""Given the absolute url that hosts the media we wish to scan,
94
126
scan the contents of that url for matches against known harmful content.
95
127
96
128
Args:
97
129
url: The absolute URL to scan.
130
+ timeout:
131
+ If provided, will set a timeout configuration for the underlying http client.
98
132
99
133
Returns:
100
134
The record of a successful media scan.
@@ -103,14 +137,20 @@ def scan_media_from_url(self, url: str) -> ScannedMedia:
103
137
`ArachnidShieldError` on a failed but complete interaction with
104
138
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
105
139
"""
106
- return self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ))
140
+ return self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ), timeout = timeout )
107
141
108
- def scan_media_from_bytes_with_config (self , config : ScanMediaFromBytes ) -> ScannedMedia :
142
+ def scan_media_from_bytes_with_config (
143
+ self ,
144
+ config : ScanMediaFromBytes ,
145
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
146
+ ) -> ScannedMedia :
109
147
"""Given the contents of some media, along with a mime type,
110
148
scan the contents for matches against known child abuse media.
111
149
112
150
Args:
113
151
config: The context that will be used to build the request.
152
+ timeout:
153
+ If provided, will set a timeout configuration for the underlying http client.
114
154
115
155
Returns:
116
156
ScannedMedia: A record of a successful scan of the media.
@@ -125,6 +165,7 @@ def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) -> Scann
125
165
url = url ,
126
166
headers = {"Content-Type" : config .mime_type },
127
167
content = config .contents ,
168
+ timeout = timeout ,
128
169
)
129
170
130
171
if response .is_client_error or response .is_server_error :
@@ -134,12 +175,18 @@ def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) -> Scann
134
175
response .raise_for_status ()
135
176
return ScannedMedia .from_dict (response .json ())
136
177
137
- def scan_media_from_url_with_config (self , config : ScanMediaFromUrl ) -> ScannedMedia :
178
+ def scan_media_from_url_with_config (
179
+ self ,
180
+ config : ScanMediaFromUrl ,
181
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
182
+ ) -> ScannedMedia :
138
183
"""Given the absolute url that hosts the media we wish to scan,
139
184
scan the contents of that url for matches against known harmful content.
140
185
141
186
Args:
142
187
config: The context that will be used to build the request.
188
+ timeout:
189
+ If provided, will set a timeout configuration for the underlying http client.
143
190
144
191
Returns:
145
192
ScannedMedia: A record of a successful scan of the media.
@@ -155,6 +202,7 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
155
202
url = _url ,
156
203
headers = {"Content-Type" : "application/json" },
157
204
json = config .to_dict (),
205
+ timeout = timeout ,
158
206
)
159
207
160
208
if response .is_client_error or response .is_server_error :
@@ -164,11 +212,17 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
164
212
response .raise_for_status ()
165
213
return ScannedMedia .from_dict (response .json ())
166
214
167
- def scan_pdq_hashes (self , config : ScanMediaFromPdq ) -> ScannedPDQHashes :
215
+ def scan_pdq_hashes (
216
+ self ,
217
+ config : ScanMediaFromPdq ,
218
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
219
+ ) -> ScannedPDQHashes :
168
220
"""
169
221
Scan medias for CSAM based on their PDQ hashes.
170
222
Args:
171
223
config: The context that will be used to build the request.
224
+ timeout:
225
+ If provided, will set a timeout configuration for the underlying http client.
172
226
173
227
Returns:
174
228
ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -183,6 +237,7 @@ def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
183
237
url = _url ,
184
238
headers = {"Content-Type" : "application/json" },
185
239
json = config .to_dict (),
240
+ timeout = timeout ,
186
241
)
187
242
if response .is_client_error or response .is_server_error :
188
243
error_detail = ErrorDetail .from_dict (response .json ())
@@ -203,13 +258,20 @@ def __init__(self, username: typing.Union[str, bytes], password: typing.Union[st
203
258
super ().__init__ (username = username , password = password )
204
259
self .__client = super ()._build_async_http_client ()
205
260
206
- async def scan_media_from_bytes (self , contents : typing .Union [bytes , io .BytesIO ], mime_type : str ) -> ScannedMedia :
261
+ async def scan_media_from_bytes (
262
+ self ,
263
+ contents : typing .Union [bytes , io .BytesIO ],
264
+ mime_type : str ,
265
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
266
+ ) -> ScannedMedia :
207
267
"""Given the contents of some media, along with a mime type,
208
268
scan the contents for matches against known child abuse media.
209
269
210
270
Args:
211
271
contents: The raw bytes that represent the media.
212
272
mime_type: The mimetype of the media.
273
+ timeout:
274
+ If provided, will set a timeout configuration for the underlying http client.
213
275
214
276
Returns:
215
277
The record of a successful media scan.
@@ -219,14 +281,20 @@ async def scan_media_from_bytes(self, contents: typing.Union[bytes, io.BytesIO],
219
281
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
220
282
"""
221
283
222
- return await self .scan_media_from_bytes_with_config (ScanMediaFromBytes (contents = contents , mime_type = mime_type ))
284
+ return await self .scan_media_from_bytes_with_config (ScanMediaFromBytes (contents = contents , mime_type = mime_type ), timeout = timeout )
223
285
224
- async def scan_media_from_url (self , url : str ) -> ScannedMedia :
286
+ async def scan_media_from_url (
287
+ self ,
288
+ url : str ,
289
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
290
+ ) -> ScannedMedia :
225
291
"""Given the absolute url that hosts the media we wish to scan,
226
292
scan the contents of that url for matches against known harmful content.
227
293
228
294
Args:
229
295
url: The absolute URL to scan.
296
+ timeout:
297
+ If provided, will set a timeout configuration for the underlying http client.
230
298
231
299
Returns:
232
300
The record of a successful media scan.
@@ -235,10 +303,13 @@ async def scan_media_from_url(self, url: str) -> ScannedMedia:
235
303
`ArachnidShieldError` on a failed but complete interaction with
236
304
the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
237
305
"""
238
- return await self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ))
306
+ return await self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ), timeout = timeout )
239
307
240
308
async def scan_media_from_file (
241
- self , filepath : pathlib .Path , mime_type_override : typing .Optional [str ] = None
309
+ self ,
310
+ filepath : pathlib .Path ,
311
+ mime_type_override : typing .Optional [str ] = None ,
312
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
242
313
) -> ScannedMedia :
243
314
"""Given path to the media file to scan, and an optional
244
315
value for mime_type that bypasses guessing it based of the filepath,
@@ -250,6 +321,8 @@ async def scan_media_from_file(
250
321
mime_type_override:
251
322
If provided, will use this as the mime_type
252
323
instead of guessing it from the filepath.
324
+ timeout:
325
+ If provided, will set a timeout configuration for the underlying http client.
253
326
254
327
Returns:
255
328
The record of a successful media scan.
@@ -268,7 +341,7 @@ async def scan_media_from_file(
268
341
detail = (
269
342
f"Failed to identify mime_type for { filepath } . "
270
343
f"You may specify it explicitly by providing "
271
- f"`force_mime_type `."
344
+ f"`mime_type_override `."
272
345
)
273
346
)
274
347
)
@@ -277,14 +350,20 @@ async def scan_media_from_file(
277
350
contents = f .read ()
278
351
279
352
config = ScanMediaFromBytes (contents = contents , mime_type = mime_type )
280
- return await self .scan_media_from_bytes_with_config (config )
353
+ return await self .scan_media_from_bytes_with_config (config , timeout = timeout )
281
354
282
- async def scan_media_from_bytes_with_config (self , config : ScanMediaFromBytes ) -> ScannedMedia :
355
+ async def scan_media_from_bytes_with_config (
356
+ self ,
357
+ config : ScanMediaFromBytes ,
358
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
359
+ ) -> ScannedMedia :
283
360
"""Given the contents of some media, along with a mime type,
284
361
scan the contents for matches against known child abuse media.
285
362
286
363
Args:
287
364
config: The context that will be used to build the request.
365
+ timeout:
366
+ If provided, will set a timeout configuration for the underlying http client.
288
367
289
368
Returns:
290
369
ScannedMedia: A record of a successful scan of the media.
@@ -300,6 +379,7 @@ async def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) ->
300
379
url = url ,
301
380
headers = {"Content-Type" : config .mime_type },
302
381
content = config .contents ,
382
+ timeout = timeout ,
303
383
)
304
384
305
385
if response .is_client_error or response .is_server_error :
@@ -309,12 +389,18 @@ async def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) ->
309
389
response .raise_for_status ()
310
390
return ScannedMedia .from_dict (response .json ())
311
391
312
- async def scan_media_from_url_with_config (self , config : ScanMediaFromUrl ) -> ScannedMedia :
392
+ async def scan_media_from_url_with_config (
393
+ self ,
394
+ config : ScanMediaFromUrl ,
395
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
396
+ ) -> ScannedMedia :
313
397
"""Given the absolute url that hosts the media we wish to scan,
314
398
scan the contents of that url for matches against known harmful content.
315
399
316
400
Args:
317
401
config: The context that will be used to build the request.
402
+ timeout:
403
+ If provided, will set a timeout configuration for the underlying http client.
318
404
319
405
Returns:
320
406
ScannedMedia: A record of a successful scan of the media.
@@ -330,6 +416,7 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
330
416
url = _url ,
331
417
headers = {"Content-Type" : "application/json" },
332
418
json = config .to_dict (),
419
+ timeout = timeout ,
333
420
)
334
421
335
422
if response .is_client_error or response .is_server_error :
@@ -339,11 +426,17 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
339
426
response .raise_for_status ()
340
427
return ScannedMedia .from_dict (response .json ())
341
428
342
- async def scan_pdq_hashes (self , config : ScanMediaFromPdq ) -> ScannedPDQHashes :
429
+ async def scan_pdq_hashes (
430
+ self ,
431
+ config : ScanMediaFromPdq ,
432
+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
433
+ ) -> ScannedPDQHashes :
343
434
"""
344
435
Scan medias for CSAM based on their PDQ hashes.
345
436
Args:
346
437
config: The context that will be used to build the request.
438
+ timeout:
439
+ If provided, will set a timeout configuration for the underlying http client.
347
440
348
441
Returns:
349
442
ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -358,6 +451,7 @@ async def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
358
451
url = _url ,
359
452
headers = {"Content-Type" : "application/json" },
360
453
json = config .to_dict (),
454
+ timeout = timeout ,
361
455
)
362
456
if response .is_client_error or response .is_server_error :
363
457
error_detail = ErrorDetail .from_dict (response .json ())
0 commit comments