Skip to content

Commit 828d0ca

Browse files
committed
Refactor ProwlerParser for improved functionality and error handling
- Updated the parser documentation to clarify supported formats and title construction logic - Enhanced error logging for unsupported file formats in get_findings method - Simplified the extraction logic for check IDs in _parse_json_findings method - Improved remediation extraction logic to prefer 'text' field over 'desc' - Added date extraction from finding_info if available - Removed redundant test cases for ProwlerStringIOParser as they are no longer needed
1 parent 6c4e41b commit 828d0ca

File tree

2 files changed

+41
-323
lines changed

2 files changed

+41
-323
lines changed

dojo/tools/prowler/parser.py

Lines changed: 41 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,15 @@ class ProwlerParser:
1212

1313
"""
1414
A parser for Prowler scan results.
15-
Supports both CSV and OCSF JSON formats for AWS, Azure, GCP, and Kubernetes.
15+
Supports both CSV and OCSF JSON for # Construct title
16+
if original_check_id and check_title:
17+
title = f"{original_check_id}: {check_title}"
18+
elif original_check_id:
19+
title = original_check_id
20+
elif check_title:
21+
title = check_title
22+
else:
23+
title = "Prowler Finding"AWS, Azure, GCP, and Kubernetes.
1624
"""
1725

1826
def get_scan_types(self):
@@ -43,20 +51,19 @@ def get_findings(self, file, test):
4351
csv_data = self._parse_csv(content)
4452
findings = self._parse_csv_findings(csv_data, test, file_name=file_name)
4553
else:
46-
# If file type can't be determined from extension, throw an error
47-
error_message = f"Unsupported file format. Prowler parser only supports JSON and CSV files. File name: {file_name}"
48-
raise ValueError(error_message)
54+
# If file type can't be determined from extension
55+
error_msg = f"Unsupported file format. Prowler parser only supports JSON and CSV files. File name: {file_name}"
56+
logger.error(f"Unsupported file format for Prowler parser: {file_name}")
57+
raise ValueError(error_msg)
4958

5059
return findings
5160

5261
def _parse_json(self, content):
5362
"""Safely parse JSON content"""
54-
# Content is already decoded in get_findings method
5563
return json.loads(content)
5664

5765
def _parse_csv(self, content):
5866
"""Parse CSV content"""
59-
# Content is already decoded in get_findings method
6067
f = StringIO(content)
6168
csv_reader = csv.DictReader(f, delimiter=";")
6269
results = list(csv_reader)
@@ -89,7 +96,8 @@ def _determine_active_status(self, status_code):
8996
if not status_code:
9097
return True
9198

92-
inactive_statuses = ["pass", "manual", "not_available", "skipped"]
99+
# Using a set for O(1) lookup performance
100+
inactive_statuses = {"pass", "manual", "not_available", "skipped"}
93101
return status_code.lower() not in inactive_statuses
94102

95103
def _parse_json_findings(self, data, test, *, file_name=""):
@@ -98,8 +106,11 @@ def _parse_json_findings(self, data, test, *, file_name=""):
98106

99107
for item in data:
100108
# Skip items without required fields
101-
if not isinstance(item, dict) or "message" not in item:
102-
logger.debug(f"Skipping Prowler finding because it's not a dict or missing 'message' field: {item}")
109+
if not isinstance(item, dict):
110+
logger.debug(f"Skipping Prowler finding because it's not a dict: {item}")
111+
continue
112+
if "message" not in item:
113+
logger.debug(f"Skipping Prowler finding because it's missing 'message' field: {item}")
103114
continue
104115

105116
# Get basic information
@@ -157,47 +168,19 @@ def _parse_json_findings(self, data, test, *, file_name=""):
157168
if "finding_info" in item and isinstance(item["finding_info"], dict):
158169
unique_id = item["finding_info"].get("uid", "")
159170

160-
# Extract check ID from various places
171+
# Get check ID - simplify extraction logic
161172
check_id = None
162-
if "check_id" in item:
173+
if "finding_info" in item and isinstance(item["finding_info"], dict):
174+
check_id = item["finding_info"].get("check_id")
175+
# Fall back to top-level check_id if not found in finding_info
176+
if not check_id and "check_id" in item:
163177
check_id = item.get("check_id")
164-
elif (
165-
"finding_info" in item and isinstance(item["finding_info"], dict) and "check_id" in item["finding_info"]
166-
):
167-
check_id = item["finding_info"]["check_id"]
168-
169-
# Map certain titles or contents to standardized check IDs
170-
# This helps with consistency across different formats
171-
172-
# For AWS
173-
if cloud_provider == "aws" or (not cloud_provider and "Hardware MFA" in title):
174-
if "Hardware MFA" in title or "hardware_mfa" in title.lower():
175-
check_id = "iam_root_hardware_mfa_enabled"
176-
177-
# For Azure
178-
elif cloud_provider == "azure" or (not cloud_provider and "Network policy" in title):
179-
if "Network policy" in title or "network policy" in title.lower() or "cluster" in title:
180-
check_id = "aks_network_policy_enabled"
181-
182-
# For GCP
183-
elif cloud_provider == "gcp" or (
184-
not cloud_provider and any(x in title.lower() for x in ["rdp", "firewall"])
185-
):
186-
if "rdp" in title.lower() or "firewall" in title.lower():
187-
check_id = "bc_gcp_networking_2"
188-
189-
# For Kubernetes
190-
elif cloud_provider == "kubernetes" or (not cloud_provider and "AlwaysPullImages" in title):
191-
if "AlwaysPullImages" in title:
192-
check_id = "bc_k8s_pod_security_1"
193178

194179
# Get remediation information
195180
remediation = ""
196181
if "remediation" in item and isinstance(item["remediation"], dict):
197-
if "text" in item["remediation"]:
198-
remediation = item["remediation"]["text"]
199-
elif "desc" in item["remediation"]:
200-
remediation = item["remediation"]["desc"]
182+
# Try to get remediation - prefer "text" field but fall back to "desc" if needed
183+
remediation = item["remediation"].get("text", item["remediation"].get("desc", ""))
201184

202185
# Add notes to description
203186
if status_code:
@@ -227,6 +210,10 @@ def _parse_json_findings(self, data, test, *, file_name=""):
227210
# Add additional metadata
228211
finding.unsaved_tags = []
229212

213+
# Extract date if available
214+
if "finding_info" in item and isinstance(item["finding_info"], dict) and "created_time_dt" in item["finding_info"]:
215+
finding.date = item["finding_info"]["created_time_dt"]
216+
230217
# Add cloud provider as tag if available
231218
if cloud_provider:
232219
finding.unsaved_tags.append(cloud_provider)
@@ -287,7 +274,6 @@ def _parse_csv_findings(self, csv_data, test, *, file_name=""):
287274
check_id = row.get("CHECK_ID", "")
288275
check_title = row.get("CHECK_TITLE", "")
289276
provider = row.get("PROVIDER", "").lower()
290-
service_name = row.get("SERVICE_NAME", "")
291277

292278
# Original check ID before any standardization (for titles)
293279
original_check_id = check_id
@@ -306,9 +292,9 @@ def _parse_csv_findings(self, csv_data, test, *, file_name=""):
306292

307293
# Construct title
308294
if original_check_id and check_title:
309-
title = f"{original_check_id}: {check_title}"
310-
elif original_check_id:
311-
title = original_check_id
295+
title = f"{check_id}: {check_title}"
296+
elif check_id:
297+
title = check_id
312298
elif check_title:
313299
title = check_title
314300
else:
@@ -382,6 +368,13 @@ def _parse_csv_findings(self, csv_data, test, *, file_name=""):
382368

383369
# Add provider as tag if available
384370
finding.unsaved_tags = []
371+
372+
# Extract date if available
373+
if row.get("TIMESTAMP", ""):
374+
finding.date = row.get("TIMESTAMP")
375+
elif row.get("ASSESSMENT_START_TIME", ""):
376+
finding.date = row.get("ASSESSMENT_START_TIME")
377+
385378
if provider:
386379
finding.unsaved_tags.append(provider)
387380
# If no provider in the CSV but we can infer it from check_id or title

0 commit comments

Comments
 (0)