Skip to content

Commit 2389d3c

Browse files
authored
Enhance the logging and reporting of input fetch exceptions #1257 (#1269)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent 98f0f1e commit 2389d3c

File tree

4 files changed

+67
-16
lines changed

4 files changed

+67
-16
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ v34.6.2 (unreleased)
1111
- Add support for fetching Git repository as Project input.
1212
https://github.com/nexB/scancode.io/issues/921
1313

14+
- Enhance the logging and reporting of input fetch exceptions.
15+
https://github.com/nexB/scancode.io/issues/1257
16+
1417
v34.6.1 (2024-06-07)
1518
--------------------
1619

scanpipe/pipelines/__init__.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,19 @@
4040
logger = logging.getLogger(__name__)
4141

4242

43-
class InputFileError(Exception):
43+
class InputFilesError(Exception):
4444
"""InputFile is missing or cannot be downloaded."""
4545

46+
def __init__(self, error_tracebacks):
47+
self.error_tracebacks = error_tracebacks
48+
super().__init__(self._generate_message())
49+
50+
def _generate_message(self):
51+
message = "InputFilesError encountered with the following issues:\n"
52+
for index, (error, tb) in enumerate(self.error_tracebacks, start=1):
53+
message += f"\nError {index}: {str(error)}\n\n{tb}"
54+
return message
55+
4656

4757
def group(*groups):
4858
"""Mark a function as part of a particular group."""
@@ -218,9 +228,9 @@ def execute(self):
218228
def download_missing_inputs(self):
219229
"""
220230
Download any InputSource missing on disk.
221-
Raise an error if any of the uploaded files is not available.
231+
Raise an error if any of the uploaded files is not available or not reachable.
222232
"""
223-
errors = []
233+
error_tracebacks = []
224234

225235
for input_source in self.project.inputsources.all():
226236
if input_source.exists():
@@ -229,18 +239,20 @@ def download_missing_inputs(self):
229239
if input_source.is_uploaded:
230240
msg = f"Uploaded file {input_source} not available."
231241
self.log(msg)
232-
errors.append(msg)
242+
error_tracebacks.append((msg, "No traceback available."))
233243
continue
234244

235245
self.log(f"Fetching input from {input_source.download_url}")
236246
try:
237247
input_source.fetch()
238248
except Exception as error:
249+
traceback_str = traceback.format_exc()
250+
logger.error(traceback_str)
239251
self.log(f"{input_source.download_url} could not be fetched.")
240-
errors.append(error)
252+
error_tracebacks.append((str(error), traceback_str))
241253

242-
if errors:
243-
raise InputFileError(errors)
254+
if error_tracebacks:
255+
raise InputFilesError(error_tracebacks)
244256

245257
def add_error(self, exception, resource=None):
246258
"""Create a ``ProjectMessage`` ERROR record on the current `project`."""

scanpipe/pipes/fetch.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,22 @@ def run_command_safely(command_args):
6363
sure to sanitize and validate the input to prevent any malicious commands from
6464
being executed.
6565
66-
As ``check`` is True, if the exit code is non-zero, it raises a CalledProcessError.
66+
Raise a SubprocessError if the exit code was non-zero.
6767
"""
68-
result = subprocess.run( # nosec
68+
completed_process = subprocess.run( # nosec
6969
command_args,
7070
capture_output=True,
7171
text=True,
72-
check=True,
7372
)
74-
return result.stdout
73+
74+
if completed_process.returncode:
75+
error_msg = (
76+
f'Error while executing cmd="{completed_process.args}": '
77+
f'"{completed_process.stderr.strip()}"'
78+
)
79+
raise subprocess.SubprocessError(error_msg)
80+
81+
return completed_process.stdout
7582

7683

7784
def get_request_session(uri):

scanpipe/tests/test_pipelines.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from scanpipe.models import CodebaseResource
4040
from scanpipe.models import DiscoveredPackage
4141
from scanpipe.models import Project
42-
from scanpipe.pipelines import InputFileError
42+
from scanpipe.pipelines import InputFilesError
4343
from scanpipe.pipelines import Pipeline
4444
from scanpipe.pipelines import is_pipeline
4545
from scanpipe.pipelines import root_filesystem
@@ -166,11 +166,17 @@ def test_scanpipe_pipeline_class_download_missing_inputs(self, mock_get):
166166
filename=file_location.name, is_uploaded=True
167167
)
168168
self.assertFalse(input_source.exists())
169-
with self.assertRaises(InputFileError) as error:
169+
with self.assertRaises(InputFilesError) as error:
170170
pipeline.download_missing_inputs()
171-
error_msg = "Uploaded file filename=notice.NOTICE [uploaded] not available."
172-
self.assertEqual(f"['{error_msg}']", str(error.exception))
173-
self.assertIn(error_msg, run.log)
171+
error_msg = (
172+
"InputFilesError encountered with the following issues:\n\n"
173+
"Error 1: Uploaded file filename=notice.NOTICE [uploaded] not available."
174+
"\n\nNo traceback available."
175+
)
176+
self.assertEqual(error_msg, str(error.exception))
177+
self.assertIn(
178+
"Uploaded file filename=notice.NOTICE [uploaded] not available.", run.log
179+
)
174180

175181
project1.copy_input_from(file_location)
176182
self.assertTrue(input_source.exists())
@@ -191,6 +197,29 @@ def test_scanpipe_pipeline_class_download_missing_inputs(self, mock_get):
191197
self.assertTrue(input_source2.exists())
192198
mock_get.assert_called_once()
193199

200+
@mock.patch("scanpipe.models.InputSource.fetch")
201+
def test_scanpipe_pipeline_class_download_fetch_exception(self, mock_fetch):
202+
project1 = Project.objects.create(name="Analysis")
203+
run = project1.add_pipeline("do_nothing")
204+
pipeline = run.make_pipeline_instance()
205+
206+
mock_fetch.side_effect = Exception("File not found")
207+
download_url = "https://download.url/file.zip"
208+
project1.add_input_source(download_url=download_url)
209+
210+
with self.assertRaises(InputFilesError) as error:
211+
pipeline.download_missing_inputs()
212+
self.assertIn(
213+
"InputFilesError encountered with the following issues:",
214+
str(error.exception),
215+
)
216+
self.assertIn("Error 1: File not found", str(error.exception))
217+
self.assertIn("Traceback (most recent call last):", str(error.exception))
218+
self.assertIn("Exception: File not found", str(error.exception))
219+
220+
self.assertIn("Fetching input from https://download.url/file.zip", run.log)
221+
self.assertIn("https://download.url/file.zip could not be fetched.", run.log)
222+
194223
@mock.patch("git.repo.base.Repo.clone_from")
195224
def test_scanpipe_pipeline_class_download_missing_inputs_git_repo(self, mock_clone):
196225
project1 = Project.objects.create(name="Analysis")

0 commit comments

Comments
 (0)