Skip to content

Commit 8c86536

Browse files
committed
Do not fail with --replace-originals
These archive should not crash extraction when using --replace-originals Reported-by: Smascer @Smascer Reported-by: Bryan Sutula @sutula Reference: #31 Reference: aboutcode-org/scancode-toolkit#2723 Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent 7f007db commit 8c86536

File tree

7 files changed

+34
-6
lines changed

7 files changed

+34
-6
lines changed

src/extractcode/extract.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,17 @@ def extract(
126126
processed_events_append = processed_events.append
127127
for event in extract_events:
128128
yield event
129+
if event.warnings or event.errors:
130+
if TRACE:
131+
logger.debug(
132+
f'extract:replace_originals: {event} has errors. '
133+
'not replacing originals'
134+
)
135+
continue
129136
if replace_originals:
130137
processed_events_append(event)
131138

139+
132140
# move files around when done
133141
if replace_originals:
134142
for xevent in reversed(processed_events):
64 KB
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
download_url: https://golang.org/src/compress/gzip/testdata/issue6550.gz.base64
1.34 KB
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
download_url: https://github.com/ruby/rake/blob/v0.9.2.2/doc/rake.1.gz?raw=true

tests/test_extract.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -274,19 +274,21 @@ def test_extract_tree_recursive_replace_originals(self):
274274
check_files(test_dir, expected)
275275

276276
def test_extract_with_replace_originals_does_not_fail_with_gz_with_trailing(self):
277-
expected = (
278-
)
277+
expected = ('rake.1.gz',)
279278
test_dir = self.get_test_loc('extract/replace-originals/rake.1.gz', copy=True)
280279
result = list(extract.extract(test_dir, recurse=True, replace_originals=True))
281-
check_no_error(result)
280+
r = result[-1]
281+
assert r.errors and all(e.startswith('Not a gzipped file') for e in r.errors)
282+
assert not r.warnings
282283
check_files(test_dir, expected)
283284

284285
def test_extract_with_replace_originals_does_not_fail_with_corrupted_archive(self):
285-
expected = (
286-
)
286+
expected = ('issue6550.gz',)
287287
test_dir = self.get_test_loc('extract/replace-originals/issue6550.gz', copy=True)
288288
result = list(extract.extract(test_dir, recurse=True, replace_originals=True))
289-
check_no_error(result)
289+
r = result[-1]
290+
assert r.errors and all(e.startswith('Error') for e in r.errors)
291+
assert not r.warnings
290292
check_files(test_dir, expected)
291293

292294
def test_extract_tree_shallow_then_recursive(self):

tests/test_extractcode_cli.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,22 @@ def test_extractcode_command_can_ignore():
261261
assert sorted(expected) == sorted(file_result)
262262

263263

264+
def test_extractcode_command_does_not_crash_with_replace_originals_and_corrupted_archives():
265+
test_dir = test_env.get_test_loc('cli/replace-originals', copy=True)
266+
result = run_extract(['--replace-originals', '--verbose', test_dir] , expected_rc=1)
267+
268+
assert not os.path.exists(os.path.join(test_dir, 'rake.1.gz-extract'))
269+
assert 'rake.1.gz' in result.stdout
270+
271+
assert 'Extracting archives...' in result.stderr
272+
assert 'ERROR extracting' in result.stderr
273+
assert 'rake.1.gz' in result.stderr
274+
assert 'Not a gzipped file ' in result.stderr
275+
assert 'issue6550.gz' in result.stderr
276+
assert ' too many length or distance symbols' in result.stderr
277+
assert 'Extracting done.' in result.stderr
278+
279+
264280
@pytest.mark.skipif(on_windows, reason='FIXME: this test fails on Windows until we have support for long file names.')
265281
def test_extractcode_command_can_extract_nuget():
266282
test_dir = test_env.get_test_loc('cli/extract_nuget', copy=True)

0 commit comments

Comments
 (0)