Skip to content

Commit cf77c40

Browse files
authored
Merge pull request #564 from DataDog/s.obregoso/fix_sarif
Refactor to use Dependency structure
2 parents a8c6814 + 900c3ae commit cf77c40

16 files changed

+921
-532
lines changed

guarddog/cli.py

Lines changed: 33 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
""" Package Malware Scanner
1+
"""Package Malware Scanner
22
33
CLI command that scans a package version for user-specified malware flags.
44
Includes rules based on package registry metadata and source code analysis.
55
"""
66

77
from functools import reduce
8-
import json as js
98
import logging
109
import os
1110
import sys
@@ -14,12 +13,12 @@
1413

1514
import click
1615
from prettytable import PrettyTable
17-
from termcolor import colored
1816

1917
from guarddog.analyzer.metadata import get_metadata_detectors
2018
from guarddog.analyzer.sourcecode import get_sourcecode_rules
2119
from guarddog.ecosystems import ECOSYSTEM
22-
from guarddog.reporters.sarif import report_verify_sarif
20+
from guarddog.reporters.reporter_factory import ReporterFactory, ReporterType
21+
2322
from guarddog.scanners import get_package_scanner, get_project_scanner
2423
from guarddog.utils.archives import safe_extract
2524

@@ -127,7 +126,7 @@ def _get_all_rules(ecosystem: ECOSYSTEM) -> set[str]:
127126

128127
def _get_rule_param(
129128
rules: tuple[str, ...], exclude_rules: tuple[str, ...], ecosystem: ECOSYSTEM
130-
) -> Optional[set]:
129+
) -> Optional[set[str]]:
131130
"""
132131
This function should return None if no rules are provided
133132
Else a set of rules to be used for scanning
@@ -162,28 +161,20 @@ def _verify(
162161
log.error(f"Command verify is not supported for ecosystem {ecosystem}")
163162
exit(1)
164163

165-
def display_result(result: dict) -> None:
166-
identifier = (
167-
result["dependency"]
168-
if result["version"] is None
169-
else f"{result['dependency']} version {result['version']}"
170-
)
171-
if output_format is None:
172-
print_scan_results(result.get("result"), identifier)
173-
174-
if len(result.get("errors", [])) > 0:
175-
print_errors(result.get("error"), identifier)
164+
dependencies, results = scanner.scan_local(path=path, rules=rule_param)
176165

177-
results = scanner.scan_local(path, rule_param, display_result)
178-
if output_format == "json":
179-
return_value = js.dumps(results)
166+
rule_docs = list(rule_param or _get_all_rules(ecosystem=ecosystem))
180167

181-
if output_format == "sarif":
182-
sarif_rules = _get_all_rules(ecosystem)
183-
return_value = report_verify_sarif(path, list(sarif_rules), results, ecosystem)
168+
reporter = ReporterFactory.create_reporter(ReporterType.from_str(output_format))
169+
stdout, stderr = reporter.render_verify(
170+
dependency_files=dependencies,
171+
rule_names=rule_docs,
172+
scan_results=results,
173+
ecosystem=ecosystem,
174+
)
184175

185-
if output_format is not None:
186-
print(return_value)
176+
sys.stdout.write(stdout)
177+
sys.stderr.write(stderr)
187178

188179
if exit_non_zero_on_finding:
189180
exit_with_status_code([result["result"] for result in results])
@@ -231,10 +222,10 @@ def _scan(
231222
log.error(f"Error occurred while scanning target {identifier}: '{e}'\n")
232223
sys.exit(1)
233224

234-
if output_format == "json":
235-
print(js.dumps(result))
236-
else:
237-
print_scan_results(result, result["package"])
225+
reporter = ReporterFactory.create_reporter(ReporterType.from_str(output_format))
226+
stdout, stderr = reporter.render_scan(result)
227+
sys.stdout.write(stdout)
228+
sys.stderr.write(stderr)
238229

239230
if exit_non_zero_on_finding:
240231
exit_with_status_code([result])
@@ -262,6 +253,7 @@ class CliEcosystem(click.Group):
262253
Class that dynamically represents an ecosystem in click
263254
It dynamically selects the ruleset to the instantiated ecosystem
264255
"""
256+
265257
def __init__(self, ecosystem: ECOSYSTEM):
266258
super().__init__()
267259
self.name = ecosystem.name.lower()
@@ -288,7 +280,12 @@ def rule_options(fn):
288280
@scan_options
289281
@rule_options
290282
def scan_ecosystem(
291-
target, version, rules, exclude_rules, output_format, exit_non_zero_on_finding
283+
target,
284+
version,
285+
rules,
286+
exclude_rules,
287+
output_format,
288+
exit_non_zero_on_finding,
292289
):
293290
return _scan(
294291
target,
@@ -304,7 +301,9 @@ def scan_ecosystem(
304301
@common_options
305302
@verify_options
306303
@rule_options
307-
def verify_ecosystem(target, rules, exclude_rules, output_format, exit_non_zero_on_finding):
304+
def verify_ecosystem(
305+
target, rules, exclude_rules, output_format, exit_non_zero_on_finding
306+
):
308307
return _verify(
309308
target,
310309
rules,
@@ -314,7 +313,9 @@ def verify_ecosystem(target, rules, exclude_rules, output_format, exit_non_zero_
314313
self.ecosystem,
315314
)
316315

317-
@click.command("list-rules", help=f"List available rules for {self.ecosystem.name}")
316+
@click.command(
317+
"list-rules", help=f"List available rules for {self.ecosystem.name}"
318+
)
318319
def list_rules_ecosystem():
319320
return _list_rules(self.ecosystem)
320321

@@ -333,7 +334,7 @@ def list_rules_ecosystem():
333334
@verify_options
334335
@legacy_rules_options
335336
def verify(target, rules, exclude_rules, output_format, exit_non_zero_on_finding):
336-
return _verify(
337+
return verify(
337338
target,
338339
rules,
339340
exclude_rules,
@@ -361,81 +362,6 @@ def scan(
361362
)
362363

363364

364-
# Pretty prints scan results for the console
365-
def print_scan_results(results, identifier):
366-
num_issues = results.get("issues")
367-
errors = results.get("errors", [])
368-
369-
if num_issues == 0:
370-
print(
371-
"Found "
372-
+ colored("0 potentially malicious indicators", "green", attrs=["bold"])
373-
+ " scanning "
374-
+ colored(identifier, None, attrs=["bold"])
375-
)
376-
print()
377-
else:
378-
print(
379-
"Found "
380-
+ colored(
381-
str(num_issues) + " potentially malicious indicators",
382-
"red",
383-
attrs=["bold"],
384-
)
385-
+ " in "
386-
+ colored(identifier, None, attrs=["bold"])
387-
)
388-
print()
389-
390-
findings = results.get("results", [])
391-
for finding in findings:
392-
description = findings[finding]
393-
if isinstance(description, str): # package metadata
394-
print(colored(finding, None, attrs=["bold"]) + ": " + description)
395-
print()
396-
elif isinstance(description, list): # semgrep rule result:
397-
source_code_findings = description
398-
print(
399-
colored(finding, None, attrs=["bold"])
400-
+ ": found "
401-
+ str(len(source_code_findings))
402-
+ " source code matches"
403-
)
404-
for finding in source_code_findings:
405-
print(
406-
" * "
407-
+ finding["message"]
408-
+ " at "
409-
+ finding["location"]
410-
+ "\n "
411-
+ format_code_line_for_output(finding["code"])
412-
)
413-
print()
414-
415-
if len(errors) > 0:
416-
print_errors(errors, identifier)
417-
print("\n")
418-
419-
420-
def print_errors(errors, identifier):
421-
print(
422-
colored("Some rules failed to run while scanning " + identifier + ":", "yellow")
423-
)
424-
print()
425-
for rule in errors:
426-
print(f"* {rule}: {errors[rule]}")
427-
print()
428-
429-
430-
def format_code_line_for_output(code):
431-
return " " + colored(
432-
code.strip().replace("\n", "\n ").replace("\t", " "),
433-
None,
434-
"on_red",
435-
attrs=["bold"],
436-
)
437-
438-
439365
# Given the results, exit with the appropriate status code
440366
def exit_with_status_code(results):
441367
for result in results:

guarddog/reporters/__init__.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from guarddog.scanners.scanner import DependencyFile
2+
from typing import List
3+
from guarddog.ecosystems import ECOSYSTEM
4+
5+
6+
class BaseReporter:
7+
"""
8+
Base class for all reporters.
9+
"""
10+
11+
@staticmethod
12+
def render_scan(scan_results: dict) -> tuple[str, str]:
13+
"""
14+
Report the scans results.
15+
"""
16+
raise NotImplementedError("Subclasses must implement this method.")
17+
18+
@staticmethod
19+
def render_verify(
20+
dependency_files: List[DependencyFile],
21+
rule_names: list[str],
22+
scan_results: list[dict],
23+
ecosystem: ECOSYSTEM,
24+
) -> tuple[str, str]:
25+
"""
26+
Report the scans results.
27+
"""
28+
raise NotImplementedError("Subclasses must implement this method.")
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
from termcolor import colored
2+
from guarddog.reporters import BaseReporter
3+
from typing import List
4+
from guarddog.scanners.scanner import DependencyFile
5+
from guarddog.ecosystems import ECOSYSTEM
6+
7+
8+
class HumanReadableReporter(BaseReporter):
9+
"""
10+
HumanReadableReporter is a class that formats and prints scan results in a human-readable format.
11+
"""
12+
13+
@staticmethod
14+
def print_errors(identifier: str, results: dict) -> str:
15+
errors = results.get("errors", [])
16+
if not errors:
17+
return ""
18+
19+
lines = []
20+
lines.append("")
21+
lines.append(
22+
colored(
23+
"Some rules failed to run while scanning " + identifier + ":",
24+
"yellow",
25+
)
26+
)
27+
lines.append("")
28+
for rule in errors:
29+
lines.append(f"* {rule}: {errors[rule]}")
30+
31+
return "\n".join(lines)
32+
33+
@staticmethod
34+
def print_scan_results(identifier: str, results: dict) -> str:
35+
36+
def _format_code_line_for_output(code) -> str:
37+
return " " + colored(
38+
code.strip().replace("\n", "\n ").replace("\t", " "),
39+
None,
40+
"on_red",
41+
attrs=["bold"],
42+
)
43+
44+
num_issues = results.get("issues")
45+
lines = []
46+
47+
if num_issues == 0:
48+
lines.append(
49+
"Found "
50+
+ colored("0 potentially malicious indicators", "green", attrs=["bold"])
51+
+ " scanning "
52+
+ colored(identifier, None, attrs=["bold"])
53+
)
54+
lines.append("")
55+
else:
56+
lines.append(
57+
"Found "
58+
+ colored(
59+
str(num_issues) + " potentially malicious indicators",
60+
"red",
61+
attrs=["bold"],
62+
)
63+
+ " in "
64+
+ colored(identifier, None, attrs=["bold"])
65+
)
66+
lines.append("")
67+
68+
findings = results.get("results", [])
69+
for finding in findings:
70+
description = findings[finding]
71+
if isinstance(description, str): # package metadata
72+
lines.append(
73+
colored(finding, None, attrs=["bold"]) + ": " + description
74+
)
75+
lines.append("")
76+
elif isinstance(description, list): # semgrep rule result:
77+
source_code_findings = description
78+
lines.append(
79+
colored(finding, None, attrs=["bold"])
80+
+ ": found "
81+
+ str(len(source_code_findings))
82+
+ " source code matches"
83+
)
84+
for finding in source_code_findings:
85+
lines.append(
86+
" * "
87+
+ finding["message"]
88+
+ " at "
89+
+ finding["location"]
90+
+ "\n "
91+
+ _format_code_line_for_output(finding["code"])
92+
)
93+
lines.append("")
94+
95+
return "\n".join(lines)
96+
97+
@staticmethod
98+
def render_scan(scan_results: dict) -> tuple[str, str]:
99+
"""
100+
Report the scans results in a human-readable format.
101+
102+
Args:
103+
scan_results (dict): The scan results to be reported.
104+
"""
105+
return (
106+
HumanReadableReporter.print_scan_results(
107+
identifier=scan_results["package"], results=scan_results
108+
),
109+
HumanReadableReporter.print_errors(
110+
identifier=scan_results["package"], results=scan_results
111+
),
112+
)
113+
114+
@staticmethod
115+
def render_verify(
116+
dependency_files: List[DependencyFile],
117+
rule_names: list[str],
118+
scan_results: list[dict],
119+
ecosystem: ECOSYSTEM,
120+
) -> tuple[str, str]:
121+
return (
122+
"\n".join(
123+
[
124+
HumanReadableReporter.print_scan_results(
125+
identifier=s["dependency"], results=s["result"]
126+
)
127+
for s in scan_results
128+
]
129+
),
130+
"\n".join(
131+
[
132+
HumanReadableReporter.print_errors(
133+
identifier=s["dependency"], results=s["result"]
134+
)
135+
for s in scan_results
136+
]
137+
),
138+
)

0 commit comments

Comments
 (0)