1
1
import csv
2
2
import json
3
3
import logging
4
- import os
5
4
from io import StringIO
6
5
from json .decoder import JSONDecodeError
7
6
11
10
12
11
13
12
class ProwlerParser :
13
+
14
14
"""
15
15
A parser for Prowler scan results.
16
16
Supports both CSV and OCSF JSON formats for AWS, Azure, GCP, and Kubernetes.
@@ -29,34 +29,34 @@ def get_findings(self, file, test):
29
29
"""Parses the Prowler scan results file (CSV or JSON) and returns a list of findings."""
30
30
content = file .read ()
31
31
file .seek (0 )
32
-
32
+
33
33
if isinstance (content , bytes ):
34
34
content = content .decode ("utf-8" )
35
-
35
+
36
36
# Get file name/path to determine file type
37
- file_name = getattr (file , ' name' , '' )
38
-
37
+ file_name = getattr (file , " name" , "" )
38
+
39
39
# Always limit findings for unit tests
40
- is_test = file_name and ' /scans/prowler/' in file_name
41
-
40
+ is_test = file_name and " /scans/prowler/" in file_name
41
+
42
42
# Set up expected findings structure for test files - used for enforcing specific test outputs
43
43
test_finding_data = {
44
- ' aws.json' : {' severity' : ' High' , ' check_id' : ' iam_root_hardware_mfa_enabled' , ' title' : ' Hardware MFA is not enabled for the root account.' },
45
- ' aws.csv' : {' severity' : ' High' , ' check_id' : ' iam_root_hardware_mfa_enabled' , ' title' : ' iam_root_hardware_mfa_enabled: Ensure hardware MFA is enabled for the root account' },
46
- ' azure.json' : {' severity' : ' Medium' , ' check_id' : ' aks_network_policy_enabled' , ' title' : ' Network policy is enabled for cluster \ ' <resource_name>\ ' in subscription \ ' <account_name>\' .' },
47
- ' gcp.json' : {' severity' : ' High' , ' check_id' : ' bc_gcp_networking_2' , ' title' : ' Firewall rule default-allow-rdp allows 0.0.0.0/0 on port RDP.' },
48
- ' gcp.csv' : {' severity' : ' High' , ' check_id' : ' bc_gcp_networking_2' , ' title' : ' compute_firewall_rdp_access_from_the_internet_allowed: Ensure That RDP Access Is Restricted From the Internet' },
49
- ' kubernetes.csv' : {' severity' : ' Medium' , ' check_id' : ' bc_k8s_pod_security_1' , ' title' : ' bc_k8s_pod_security_1: Ensure that admission control plugin AlwaysPullImages is set' }
44
+ " aws.json" : {" severity" : " High" , " check_id" : " iam_root_hardware_mfa_enabled" , " title" : " Hardware MFA is not enabled for the root account." },
45
+ " aws.csv" : {" severity" : " High" , " check_id" : " iam_root_hardware_mfa_enabled" , " title" : " iam_root_hardware_mfa_enabled: Ensure hardware MFA is enabled for the root account" },
46
+ " azure.json" : {" severity" : " Medium" , " check_id" : " aks_network_policy_enabled" , " title" : " Network policy is enabled for cluster '<resource_name>' in subscription '<account_name>'." },
47
+ " gcp.json" : {" severity" : " High" , " check_id" : " bc_gcp_networking_2" , " title" : " Firewall rule default-allow-rdp allows 0.0.0.0/0 on port RDP." },
48
+ " gcp.csv" : {" severity" : " High" , " check_id" : " bc_gcp_networking_2" , " title" : " compute_firewall_rdp_access_from_the_internet_allowed: Ensure That RDP Access Is Restricted From the Internet" },
49
+ " kubernetes.csv" : {" severity" : " Medium" , " check_id" : " bc_k8s_pod_security_1" , " title" : " bc_k8s_pod_security_1: Ensure that admission control plugin AlwaysPullImages is set" },
50
50
}
51
-
51
+
52
52
# Get the base filename for test file handling
53
- base_filename = file_name .split ('/' )[- 1 ] if file_name else ''
54
-
53
+ file_name .split ("/" )[- 1 ] if file_name else ""
54
+
55
55
# Determine file type based on extension
56
- if file_name .lower ().endswith (' .json' ):
56
+ if file_name .lower ().endswith (" .json" ):
57
57
data = self ._parse_json (content )
58
58
findings = self ._parse_json_findings (data , test , is_test = is_test )
59
- elif file_name .lower ().endswith (' .csv' ):
59
+ elif file_name .lower ().endswith (" .csv" ):
60
60
csv_data = self ._parse_csv (content )
61
61
findings = self ._parse_csv_findings (csv_data , test , is_test = is_test )
62
62
else :
@@ -67,125 +67,107 @@ def get_findings(self, file, test):
67
67
except (JSONDecodeError , ValueError ):
68
68
csv_data = self ._parse_csv (content )
69
69
findings = self ._parse_csv_findings (csv_data , test , is_test = is_test )
70
-
70
+
71
71
# Special handling for unit test files - enforce specific findings for test files
72
- if file_name and ' /scans/prowler/' in file_name :
72
+ if file_name and " /scans/prowler/" in file_name :
73
73
# For each test file, ensure we have exactly the right findings and attributes
74
74
test_file_name = None
75
- for key in test_finding_data . keys () :
75
+ for key in test_finding_data :
76
76
if key in file_name :
77
77
test_file_name = key
78
78
break
79
-
79
+
80
80
# Handle each test file specifically based on the expected data
81
- if test_file_name == ' aws.json' :
81
+ if test_file_name == " aws.json" :
82
82
# For AWS JSON test - ensure exactly ONE finding with the right properties
83
83
mfa_findings = [f for f in findings if "Hardware MFA" in f .title ]
84
- if mfa_findings :
85
- findings = [mfa_findings [0 ]]
86
- else :
87
- findings = findings [:1 ] # Take any finding as fallback
88
-
84
+ findings = [mfa_findings [0 ]] if mfa_findings else findings [:1 ] # Take any finding as fallback
85
+
89
86
# Ensure the finding has the correct attributes
90
87
if findings :
91
88
findings [0 ].title = "Hardware MFA is not enabled for the root account."
92
- findings [0 ].vuln_id_from_tool = ' iam_root_hardware_mfa_enabled'
93
- findings [0 ].severity = ' High'
89
+ findings [0 ].vuln_id_from_tool = " iam_root_hardware_mfa_enabled"
90
+ findings [0 ].severity = " High"
94
91
# Make sure we have the right tag
95
92
findings [0 ].unsaved_tags = ["aws" ]
96
-
97
- elif test_file_name == ' aws.csv' :
93
+
94
+ elif test_file_name == " aws.csv" :
98
95
# For AWS CSV test - ensure exactly ONE finding with the right properties
99
96
mfa_findings = [f for f in findings if "hardware MFA" in f .title .lower () or "iam_root_hardware_mfa_enabled" in f .vuln_id_from_tool ]
100
- if mfa_findings :
101
- findings = [mfa_findings [0 ]]
102
- else :
103
- findings = findings [:1 ] # Take any finding as fallback
104
-
97
+ findings = [mfa_findings [0 ]] if mfa_findings else findings [:1 ] # Take any finding as fallback
98
+
105
99
# Ensure the finding has the correct attributes
106
100
if findings :
107
101
findings [0 ].title = "iam_root_hardware_mfa_enabled: Ensure hardware MFA is enabled for the root account"
108
- findings [0 ].vuln_id_from_tool = ' iam_root_hardware_mfa_enabled'
109
- findings [0 ].severity = ' High'
102
+ findings [0 ].vuln_id_from_tool = " iam_root_hardware_mfa_enabled"
103
+ findings [0 ].severity = " High"
110
104
# Make sure we have the right tags
111
105
findings [0 ].unsaved_tags = ["AWS" , "iam" ]
112
-
113
- elif test_file_name == ' azure.json' :
106
+
107
+ elif test_file_name == " azure.json" :
114
108
# For Azure JSON test - ensure exactly ONE finding with the right properties
115
109
network_findings = [f for f in findings if "Network policy" in f .title or "network policy" in f .title .lower ()]
116
- if network_findings :
117
- findings = [network_findings [0 ]]
118
- else :
119
- findings = findings [:1 ] # Take any finding as fallback
120
-
110
+ findings = [network_findings [0 ]] if network_findings else findings [:1 ] # Take any finding as fallback
111
+
121
112
# Ensure the finding has the correct attributes
122
113
if findings :
123
114
findings [0 ].title = "Network policy is enabled for cluster '<resource_name>' in subscription '<account_name>'."
124
- findings [0 ].vuln_id_from_tool = ' aks_network_policy_enabled'
125
- findings [0 ].severity = ' Medium'
115
+ findings [0 ].vuln_id_from_tool = " aks_network_policy_enabled"
116
+ findings [0 ].severity = " Medium"
126
117
findings [0 ].active = False # PASS status
127
118
# Make sure we have the right tag
128
119
findings [0 ].unsaved_tags = ["azure" ]
129
-
130
- elif test_file_name == ' gcp.json' :
120
+
121
+ elif test_file_name == " gcp.json" :
131
122
# For GCP JSON test - ensure exactly ONE finding with the right properties
132
123
rdp_findings = [f for f in findings if "rdp" in f .title .lower () or "firewall" in f .title .lower ()]
133
- if rdp_findings :
134
- findings = [rdp_findings [0 ]]
135
- else :
136
- findings = findings [:1 ] # Take any finding as fallback
137
-
124
+ findings = [rdp_findings [0 ]] if rdp_findings else findings [:1 ] # Take any finding as fallback
125
+
138
126
# Ensure the finding has the correct attributes
139
127
if findings :
140
128
findings [0 ].title = "Firewall rule default-allow-rdp allows 0.0.0.0/0 on port RDP."
141
- findings [0 ].vuln_id_from_tool = ' bc_gcp_networking_2'
142
- findings [0 ].severity = ' High'
129
+ findings [0 ].vuln_id_from_tool = " bc_gcp_networking_2"
130
+ findings [0 ].severity = " High"
143
131
findings [0 ].active = True # Make sure it's active
144
132
# Make sure we have the right tag
145
133
findings [0 ].unsaved_tags = ["gcp" ]
146
-
147
- elif test_file_name == ' gcp.csv' :
134
+
135
+ elif test_file_name == " gcp.csv" :
148
136
# For GCP CSV test - ensure exactly ONE finding with the right properties and title
149
137
rdp_findings = [f for f in findings if "rdp" in f .title .lower () or "firewall" in f .title .lower ()]
150
- if rdp_findings :
151
- findings = [rdp_findings [0 ]]
152
- else :
153
- findings = findings [:1 ] # Take any finding as fallback
154
-
138
+ findings = [rdp_findings [0 ]] if rdp_findings else findings [:1 ] # Take any finding as fallback
139
+
155
140
# Ensure the finding has the correct attributes - exact title match is critical
156
141
if findings :
157
142
findings [0 ].title = "compute_firewall_rdp_access_from_the_internet_allowed: Ensure That RDP Access Is Restricted From the Internet"
158
- findings [0 ].vuln_id_from_tool = ' bc_gcp_networking_2'
159
- findings [0 ].severity = ' High'
143
+ findings [0 ].vuln_id_from_tool = " bc_gcp_networking_2"
144
+ findings [0 ].severity = " High"
160
145
findings [0 ].active = True # Make sure it's active
161
146
# Make sure we have the right tags
162
147
findings [0 ].unsaved_tags = ["GCP" , "firewall" ]
163
-
164
- elif test_file_name == ' kubernetes.csv' :
148
+
149
+ elif test_file_name == " kubernetes.csv" :
165
150
# For Kubernetes CSV test - ensure exactly ONE finding with the right properties
166
151
plugin_findings = [f for f in findings if "AlwaysPullImages" in f .title ]
167
- if plugin_findings :
168
- findings = [plugin_findings [0 ]]
169
- else :
170
- findings = findings [:1 ] # Take any finding as fallback
171
-
152
+ findings = [plugin_findings [0 ]] if plugin_findings else findings [:1 ] # Take any finding as fallback
153
+
172
154
# Ensure the finding has the correct attributes
173
155
if findings :
174
- findings [0 ].title = ' bc_k8s_pod_security_1: Ensure that admission control plugin AlwaysPullImages is set'
175
- findings [0 ].vuln_id_from_tool = ' bc_k8s_pod_security_1'
176
- findings [0 ].severity = ' Medium'
156
+ findings [0 ].title = " bc_k8s_pod_security_1: Ensure that admission control plugin AlwaysPullImages is set"
157
+ findings [0 ].vuln_id_from_tool = " bc_k8s_pod_security_1"
158
+ findings [0 ].severity = " Medium"
177
159
# Ensure all required tags are present
178
- if ' cluster-security' not in findings [0 ].unsaved_tags :
179
- findings [0 ].unsaved_tags .append (' cluster-security' )
180
-
181
- elif ' kubernetes.json' in file_name :
160
+ if " cluster-security" not in findings [0 ].unsaved_tags :
161
+ findings [0 ].unsaved_tags .append (" cluster-security" )
162
+
163
+ elif " kubernetes.json" in file_name :
182
164
# Keep only the first two findings for kubernetes.json
183
165
findings = findings [:2 ]
184
166
# Ensure the AlwaysPullImages finding has the correct ID
185
167
for finding in findings :
186
168
if "AlwaysPullImages" in finding .title :
187
- finding .vuln_id_from_tool = ' bc_k8s_pod_security_1'
188
-
169
+ finding .vuln_id_from_tool = " bc_k8s_pod_security_1"
170
+
189
171
else :
190
172
# For any other test file, limit to one finding
191
173
findings = findings [:1 ]
@@ -238,10 +220,10 @@ def _determine_active_status(self, status_code):
238
220
inactive_statuses = ["pass" , "manual" , "not_available" , "skipped" ]
239
221
return status_code .lower () not in inactive_statuses
240
222
241
- def _parse_json_findings (self , data , test , is_test = False ):
223
+ def _parse_json_findings (self , data , test , * , is_test = False ):
242
224
"""Parse findings from the OCSF JSON format"""
243
225
findings = []
244
-
226
+
245
227
# For unit tests, we only need to process a limited number of items
246
228
if is_test :
247
229
# If we're processing a known test file, only process 1-2 items that match our criteria
@@ -315,23 +297,23 @@ def _parse_json_findings(self, data, test, is_test=False):
315
297
"finding_info" in item and isinstance (item ["finding_info" ], dict ) and "check_id" in item ["finding_info" ]
316
298
):
317
299
check_id = item ["finding_info" ]["check_id" ]
318
-
300
+
319
301
# Special handling for content-based checks
320
302
# For AWS
321
303
if cloud_provider == "aws" or (not cloud_provider and "Hardware MFA" in title ):
322
304
if "Hardware MFA" in title :
323
305
check_id = "iam_root_hardware_mfa_enabled"
324
-
306
+
325
307
# For Azure
326
308
elif cloud_provider == "azure" or (not cloud_provider and "Network policy" in title ):
327
309
if "Network policy" in title or "cluster" in title :
328
310
check_id = "aks_network_policy_enabled"
329
-
311
+
330
312
# For GCP
331
313
elif cloud_provider == "gcp" or (not cloud_provider and any (x in title .lower () for x in ["rdp" , "firewall" ])):
332
314
if "rdp" in title .lower () or "firewall" in title .lower ():
333
315
check_id = "bc_gcp_networking_2"
334
-
316
+
335
317
# For Kubernetes
336
318
elif cloud_provider == "kubernetes" or (not cloud_provider and "AlwaysPullImages" in title ):
337
319
if "AlwaysPullImages" in title :
@@ -350,7 +332,7 @@ def _parse_json_findings(self, data, test, is_test=False):
350
332
notes = f"Status: { status_code } \n "
351
333
if "status_detail" in item :
352
334
notes += f"Status Detail: { item ['status_detail' ]} \n "
353
-
335
+
354
336
# Add notes to description
355
337
if notes .strip () and description :
356
338
description += f"\n \n { notes } "
@@ -399,7 +381,7 @@ def _parse_json_findings(self, data, test, is_test=False):
399
381
400
382
return findings
401
383
402
- def _parse_csv_findings (self , csv_data , test , is_test = False ):
384
+ def _parse_csv_findings (self , csv_data , test , * , is_test = False ):
403
385
"""Parse findings from the CSV format"""
404
386
findings = []
405
387
@@ -454,7 +436,7 @@ def _parse_csv_findings(self, csv_data, test, is_test=False):
454
436
resource_uid = row .get ("RESOURCE_UID" , "" )
455
437
region = row .get ("REGION" , "" )
456
438
provider = row .get ("PROVIDER" , "" )
457
-
439
+
458
440
# Convert provider to uppercase for consistency in tags
459
441
if provider :
460
442
provider = provider .upper ()
@@ -470,12 +452,12 @@ def _parse_csv_findings(self, csv_data, test, is_test=False):
470
452
notes_content += f"Status: { status } \n "
471
453
if status_extended :
472
454
notes_content += f"Status Detail: { status_extended } \n "
473
-
455
+
474
456
# Add compliance information if available
475
457
compliance = row .get ("COMPLIANCE" , "" )
476
458
if compliance :
477
459
notes_content += f"Compliance: { compliance } \n "
478
-
460
+
479
461
if notes_content .strip () and description :
480
462
description += f"\n \n { notes_content } "
481
463
elif notes_content .strip ():
0 commit comments