Skip to content

Commit f62fde4

Browse files
authored
Merge pull request #2512 from kamil-certat/http_collector_dirs
FIX: Support for extracting data from archives with dirs
2 parents 6aa1147 + 489a1be commit f62fde4

File tree

8 files changed

+44
-2
lines changed

8 files changed

+44
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
### Core
1515
- `intelmq.lib.utils.drop_privileges`: When IntelMQ is called as `root` and dropping the privileges to user `intelmq`, also set the non-primary groups associated with the `intelmq` user. Makes the behaviour of running intelmqctl as `root` closer to the behaviour of `sudo -u intelmq ...` (PR#2507 by Mikk Margus Möll).
16+
- `intelmq.lib.utils.unzip`: Ignore directories themselves when extracting data to prevent the extraction of empty data for a directory entries (PR#2512 by Kamil Mankowski).
1617

1718
### Development
1819

intelmq/lib/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,7 @@ def extract_tar(file):
538538
def extract(filename):
539539
return tar.extractfile(filename).read()
540540

541-
return tuple(file.name for file in tar.getmembers()), tar, extract
541+
return tuple(file.name for file in tar.getmembers() if file.isfile()), tar, extract
542542

543543

544544
def extract_gzip(file):
@@ -547,7 +547,7 @@ def extract_gzip(file):
547547

548548
def extract_zip(file):
549549
zfp = zipfile.ZipFile(io.BytesIO(file), "r")
550-
return zfp.namelist(), zfp, zfp.read
550+
return [member.filename for member in zfp.infolist() if not member.is_dir()], zfp, zfp.read
551551

552552

553553
def unzip(file: bytes, extract_files: Union[bool, list], logger=None,

intelmq/tests/assets/subdir.tar.gz

183 Bytes
Binary file not shown.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
SPDX-FileCopyrightText: 2024 CERT.at GmbH
2+
3+
SPDX-License-Identifier: AGPL-3.0-or-later

intelmq/tests/assets/subdir.zip

430 Bytes
Binary file not shown.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
SPDX-FileCopyrightText: 2024 CERT.at GmbH
2+
3+
SPDX-License-Identifier: AGPL-3.0-or-later

intelmq/tests/bots/collectors/http/test_collector.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,25 @@ def test_zip(self, mocker):
143143
self.assertMessageEqual(0, output0)
144144
self.assertMessageEqual(1, output1)
145145

146+
def test_zip_subdirs(self, mocker):
147+
"""
148+
Test unzipping when the zip has subdirectories
149+
"""
150+
prepare_mocker(mocker)
151+
self.run_bot(parameters={'http_url': 'http://localhost/subdir.zip',
152+
'name': 'Example feed',
153+
},
154+
iterations=1)
155+
156+
output0 = OUTPUT[0].copy()
157+
output0['feed.url'] = 'http://localhost/subdir.zip'
158+
output0['extra.file_name'] = 'subdir/bar'
159+
output1 = OUTPUT[1].copy()
160+
output1['feed.url'] = 'http://localhost/subdir.zip'
161+
output1['extra.file_name'] = 'subdir/foo'
162+
self.assertMessageEqual(0, output0)
163+
self.assertMessageEqual(1, output1)
164+
146165
@test.skip_exotic()
147166
def test_pgp(self, mocker):
148167
"""

intelmq/tests/lib/test_utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,14 @@ def test_unzip_tar_gz_return_names(self):
260260
self.assertEqual(tuple(result), (('bar', b'bar text\n'),
261261
('foo', b'foo text\n')))
262262

263+
def test_unzip_tar_gz_with_subdir(self):
264+
""" Test the unzip function with a tar gz file containing a subdirectory and return_names. Test that the directories themselves are ignored. """
265+
filename = os.path.join(os.path.dirname(__file__), '../assets/subdir.tar.gz')
266+
with open(filename, 'rb') as fh:
267+
result = utils.unzip(fh.read(), extract_files=True, return_names=True)
268+
self.assertEqual(tuple(result), (('subdir/foo', b'foo text\n'),
269+
('subdir/bar', b'bar text\n')))
270+
263271
def test_unzip_gz(self):
264272
""" Test the unzip function with a gz file. """
265273
filename = os.path.join(os.path.dirname(__file__), '../assets/foobar.gz')
@@ -289,6 +297,14 @@ def test_unzip_zip_return_names(self):
289297
self.assertEqual(tuple(result), (('bar', b'bar text\n'),
290298
('foo', b'foo text\n')))
291299

300+
def test_unzip_zip_with_subdir(self):
301+
""" Test the unzip function with a zip containing a subdirectory and returning names. Test that the directories themselves are ignored."""
302+
filename = os.path.join(os.path.dirname(__file__), '../assets/subdir.zip')
303+
with open(filename, 'rb') as fh:
304+
result = utils.unzip(fh.read(), extract_files=True, return_names=True)
305+
self.assertEqual(tuple(result), (('subdir/bar', b'bar text\n'),
306+
('subdir/foo', b'foo text\n')))
307+
292308
def test_file_name_from_response(self):
293309
""" test file_name_from_response """
294310
response = requests.Response()

0 commit comments

Comments
 (0)