@@ -13,6 +13,7 @@ jupyter:
13
13
---
14
14
15
15
``` python
16
+
16
17
import json
17
18
from datetime import datetime
18
19
@@ -42,6 +43,7 @@ Key insights calculated:
42
43
43
44
44
45
``` python
46
+ # Calculate total vulnerabilities for each year
45
47
total_vulns_2023 = df_2023.shape[0 ]
46
48
total_vulns_2024 = df_2024.shape[0 ]
47
49
percentage_change = round (((total_vulns_2024 - total_vulns_2023) / total_vulns_2023) * 100 , 3 )
@@ -52,11 +54,10 @@ total_vulnerabilities = {
52
54
" percentage_change" : percentage_change
53
55
}
54
56
55
- # 2023 Data
57
+ # Calculate month with the most vulnerabilities for each year
56
58
month_2023 = df_2023[' Published_Month' ].value_counts().idxmax()
57
59
count_2023 = df_2023[' Published_Month' ].value_counts().max()
58
60
59
- # 2024 Data
60
61
month_2024 = df_2024[' Published_Month' ].value_counts().idxmax()
61
62
count_2024 = df_2024[' Published_Month' ].value_counts().max()
62
63
@@ -75,11 +76,13 @@ month_with_most_vulnerabilities = {
75
76
}
76
77
}
77
78
79
+ # Generate severity distribution for each year
78
80
severity_distribution = {
79
81
" 2023" : df_2023[' CVSS_Severity' ].value_counts().to_dict(),
80
82
" 2024" : df_2024[' CVSS_Severity' ].value_counts().to_dict()
81
83
}
82
84
85
+ # Final JSON structure
83
86
overview_metrics = {
84
87
" metadata" : {
85
88
" description" : " Overview metrics summarizing vulnerability data for 2023 and 2024." ,
@@ -98,11 +101,10 @@ overview_metrics = {
98
101
}
99
102
}
100
103
101
- # Save the overview_metrics to a JSON file
102
- with open (" ../../data/2024_insights/output/overview_metrics.json" , " w" ) as f:
104
+ # Save the overview metrics to a JSON file
105
+ output_path = " ../../data/2024_insights/output/overview_metrics.json"
106
+ with open (output_path, " w" ) as f:
103
107
json.dump(overview_metrics, f)
104
-
105
- overview_metrics
106
108
```
107
109
108
110
## Time-Series Metrics
@@ -185,8 +187,6 @@ time_series_metrics = {
185
187
# Save the time_series_metrics to a JSON file
186
188
with open (" ../../data/2024_insights/output/time_series_metrics.json" , " w" ) as f:
187
189
json.dump(time_series_metrics, f)
188
-
189
- time_series_metrics
190
190
```
191
191
192
192
## Vendor/Product Analysis
@@ -344,8 +344,6 @@ vendor_product = {
344
344
# Save the vendor_product metrics to a JSON file
345
345
with open (" ../../data/2024_insights/output/vendor_product_analysis.json" , " w" ) as f:
346
346
json.dump(vendor_product, f)
347
-
348
- vendor_product
349
347
```
350
348
351
349
## CISA KEV Analysis
@@ -363,10 +361,23 @@ The Known Exploited Vulnerabilities (KEV) catalog provides critical insights for
363
361
kev_records_2023 = df_2023[df_2023[' CISA_KEV' ] == True ].copy()
364
362
kev_records_2024 = df_2024[df_2024[' CISA_KEV' ] == True ].copy()
365
363
366
- # Ensure datetime columns are in the correct format and remove timezone info
367
- for df in [kev_records_2023, kev_records_2024]:
368
- df[' KEV_DateAdded' ] = pd.to_datetime(df[' KEV_DateAdded' ], errors = ' coerce' ).dt.tz_localize(None )
369
- df[' Published_Date' ] = pd.to_datetime(df[' Published_Date' ], errors = ' coerce' ).dt.tz_localize(None )
364
+
365
+ # Function to ensure proper datetime conversion
366
+ def ensure_datetime_conversion (df , column_name ):
367
+ df[column_name] = pd.to_datetime(df[column_name], errors = ' coerce' )
368
+ if not pd.api.types.is_datetime64_any_dtype(df[column_name]):
369
+ raise ValueError (f " Column { column_name} could not be converted to datetime64[ns]! " )
370
+
371
+
372
+ # Ensure datetime for all relevant columns
373
+ for df in [df_2023, df_2024, kev_records_2023, kev_records_2024]:
374
+ ensure_datetime_conversion(df, ' Published_Date' )
375
+ ensure_datetime_conversion(df, ' KEV_DateAdded' )
376
+
377
+ # Remove timezone info
378
+ for df in [df_2023, df_2024, kev_records_2023, kev_records_2024]:
379
+ df[' Published_Date' ] = df[' Published_Date' ].dt.tz_localize(None )
380
+ df[' KEV_DateAdded' ] = df[' KEV_DateAdded' ].dt.tz_localize(None )
370
381
371
382
# Group CISA KEV Data by Month for 2023 and 2024
372
383
kev_additions = {
@@ -391,7 +402,7 @@ kev_monthly_changes = {
391
402
392
403
# Calculate NVD-KEV Overlap Percentage
393
404
nvd_cve_counts = {
394
- year: df.groupby(' Published_Month ' ).size().reindex(range (1 , 13 ), fill_value = 0 )
405
+ year: df.groupby(df[ ' Published_Date ' ].dt.month ).size().reindex(range (1 , 13 ), fill_value = 0 )
395
406
for year, df in {" 2023" : df_2023, " 2024" : df_2024}.items()
396
407
}
397
408
@@ -404,53 +415,33 @@ kev_overlap = {
404
415
for year in [" 2023" , " 2024" ]
405
416
}
406
417
407
- # Top Vendors in KEV Catalog for 2023 and 2024
418
+ # Top Vendors in KEV Catalog
408
419
top_kev_vendors = {
409
420
year: (
410
- records[records[' KEV_DateAdded' ].dt.year == int (year)]
411
- .groupby(' KEV_Vendor' )
412
- .size()
413
- .sort_values(ascending = False )
414
- .head(10 )
415
- .reset_index(name = ' kev_count' )
416
- .to_dict(orient = ' records' )
421
+ records.groupby(' KEV_Vendor' ).size()
422
+ .sort_values(ascending = False ).head(10 )
423
+ .reset_index(name = ' kev_count' ).to_dict(orient = ' records' )
417
424
)
418
425
for year, records in {" 2023" : kev_records_2023, " 2024" : kev_records_2024}.items()
419
426
}
420
427
421
- # Vendor Ranking Changes (Prioritize 2024 top vendors and compare with 2023)
428
+ # Vendor Ranking Changes
422
429
vendor_rank_changes = []
423
- for vendor in {v[' KEV_Vendor' ] for v in top_kev_vendors[' 2024' ]}:
430
+ all_vendors = {v[' KEV_Vendor' ] for v in top_kev_vendors[' 2024' ]}
431
+ for vendor in all_vendors:
424
432
rank_2023 = next ((i + 1 for i, v in enumerate (top_kev_vendors[' 2023' ]) if v[' KEV_Vendor' ] == vendor), None )
425
- rank_2024 = next ((i + 1 for i, v in enumerate (top_kev_vendors[' 2024' ]) if v[' KEV_Vendor' ] == vendor), None )
426
- vendor_rank_changes.append({
427
- " vendor" : vendor,
428
- " 2023_rank" : rank_2023,
429
- " 2024_rank" : rank_2024
430
- })
431
-
432
- # Sort the vendor_rank_changes by 2024 rank
433
- vendor_rank_changes = sorted (vendor_rank_changes, key = lambda x : x[" 2024_rank" ] if x[" 2024_rank" ] else float (' inf' ))
434
-
435
- # Time to KEV Inclusion Metrics for 2023 and 2024
436
- time_to_kev_inclusion = {}
437
- for year, records in {" 2023" : kev_records_2023, " 2024" : kev_records_2024}.items():
438
- inclusion_times = records[records[' KEV_DateAdded' ].dt.year == int (year)].copy()
439
- inclusion_times[' Time_To_KEV' ] = (
440
- inclusion_times[' KEV_DateAdded' ] - inclusion_times[' Published_Date' ]
441
- ).dt.days
442
- inclusion_times = inclusion_times[inclusion_times[' Time_To_KEV' ] >= 0 ]
443
- time_to_kev_inclusion[year] = {
444
- " min_days" : int (inclusion_times[' Time_To_KEV' ].min()) if not inclusion_times.empty else None ,
445
- " max_days" : int (inclusion_times[' Time_To_KEV' ].max()) if not inclusion_times.empty else None ,
446
- " average_days" : round (float (inclusion_times[' Time_To_KEV' ].mean()), 2 ) if not inclusion_times.empty else None
447
- }
448
-
449
- # Final JSON Structure
433
+ rank_2024 = next ((i + 1 for i, v in enumerate (top_kev_vendors[' 2024' ]) if v[' KEV_Vendor' ] == vendor), None )
434
+ vendor_rank_changes.append({
435
+ " vendor" : vendor,
436
+ " 2023_rank" : rank_2023 or " Not Ranked" ,
437
+ " 2024_rank" : rank_2024 or " Not Ranked"
438
+ })
439
+
440
+ # Prepare JSON Data
450
441
cisa_kev = {
451
442
" metadata" : {
452
443
" description" : " Analysis of CISA KEV catalog inclusion and overlap with NVD vulnerabilities for 2023 and 2024." ,
453
- " generated_on" : generated_date,
444
+ " generated_on" : " 2025-01-04 " , # Example date, replace with actual
454
445
" source" : [" NVD" , " CISA KEV" ],
455
446
" attribution" : {
456
447
" NVD" : " This product uses the NVD API but is not endorsed or certified by the NVD." ,
@@ -466,16 +457,14 @@ cisa_kev = {
466
457
" note" : " Percentage of NVD vulnerabilities in a month that were also added to KEV."
467
458
},
468
459
" top_kev_vendors" : top_kev_vendors,
469
- " vendor_rank_changes" : vendor_rank_changes,
470
- " time_to_kev_inclusion" : time_to_kev_inclusion
460
+ " vendor_rank_changes" : vendor_rank_changes
471
461
}
472
462
}
473
463
474
- # Save the cisa_kev metrics to a JSON file
475
- with open (" ../../data/2024_insights/output/cisa_kev_analysis.json" , " w" ) as f:
464
+ # Save JSON to File
465
+ output_path = " ../../data/2024_insights/output/cisa_kev_analysis.json"
466
+ with open (output_path, " w" ) as f:
476
467
json.dump(cisa_kev, f)
477
-
478
- cisa_kev
479
468
```
480
469
481
470
## Specific CVE Details
@@ -486,10 +475,16 @@ This section highlights vulnerabilities with high impact or severity to assist i
486
475
2 . ** Most Impactful Vulnerabilities** : Combines multiple factors (CVSS, KEV inclusion, exploitation evidence) to rank vulnerabilities.
487
476
488
477
``` python
478
+ import pandas as pd
479
+ import json
480
+
481
+ # Set the generated date for metadata
482
+ generated_date = " 2024-12-30"
483
+
489
484
# Filter for all CVEs with CVSS_Base_Score of 10.0
490
- cvss_10_cves = df_2024[df_2024[' CVSS_Base_Score' ] == 10.0 ]
485
+ cvss_10_cves = df_2024[df_2024[' CVSS_Base_Score' ] == 10.0 ].copy()
491
486
492
- # Additional CVEs to include if fewer than 25
487
+ # Determine if additional CVEs are needed to make up a total of 25
493
488
remaining_cves_needed = 25 - len (cvss_10_cves)
494
489
if remaining_cves_needed > 0 :
495
490
additional_cves = (
@@ -503,40 +498,38 @@ if remaining_cves_needed > 0:
503
498
else :
504
499
most_severe = cvss_10_cves
505
500
506
- # Ensure the final result is sorted and unique
501
+ # Sort and format most severe CVEs
507
502
most_severe = (
508
503
most_severe.sort_values(by = [' CVSS_Base_Score' , ' CVE_ID' ], ascending = [False , True ])
509
504
[[' CVE_ID' , ' Description' , ' CVSS_Base_Score' , ' Vendor' , ' Product' ]]
510
505
.drop_duplicates()
511
506
.to_dict(orient = ' records' )
512
507
)
513
508
514
- # Add exploitation evidence if not already present
509
+ # Add exploitation evidence to the dataset
515
510
df_2024[' Exploitation_Evidence' ] = df_2024[' CVE_ID' ].isin(kev_records_2024[' CVE_ID' ])
516
511
517
512
518
- # Define Impact Score calculation function
519
-
520
-
513
+ # Define a function to calculate the Impact Score
521
514
def calculate_impact_score (row ):
522
515
exploitation_weight = 10 if row[' Exploitation_Evidence' ] else 0
523
516
impact_score = row[' CVSS_Base_Score' ] * 2 + exploitation_weight
524
517
return round (impact_score, 2 )
525
518
526
519
527
- # Calculate Impact Scores
520
+ # Calculate Impact Scores for all CVEs
528
521
df_2024[' Impact_Score' ] = df_2024.apply(calculate_impact_score, axis = 1 )
529
522
530
- # Filter for most impactful CVEs
523
+ # Filter for the most impactful CVEs
531
524
most_impactful = (
532
525
df_2024.sort_values(by = [' Impact_Score' , ' CVE_ID' ], ascending = [False , True ])
533
526
[[' CVE_ID' , ' Impact_Score' , ' Exploitation_Evidence' , ' Vendor' , ' Product' ]]
534
527
.drop_duplicates()
535
- .head(10 ) # Top 10 CVEs
528
+ .head(10 ) # Select the top 10 CVEs
536
529
.to_dict(orient = ' records' )
537
530
)
538
531
539
- # Final JSON structure
532
+ # Construct the final JSON structure
540
533
specific_cve_details = {
541
534
" metadata" : {
542
535
" description" : " Detailed analysis of the most severe and impactful CVEs for 2024." ,
@@ -558,11 +551,10 @@ specific_cve_details = {
558
551
}
559
552
}
560
553
561
- # Save the specific_cve_details metrics to a JSON file
562
- with open (" ../../data/2024_insights/output/cve_details.json" , " w" ) as f:
554
+ # Save the specific CVE details to a JSON file
555
+ output_path = " ../../data/2024_insights/output/cve_details.json"
556
+ with open (output_path, " w" ) as f:
563
557
json.dump(specific_cve_details, f)
564
-
565
- specific_cve_details
566
558
```
567
559
568
560
## CVE Assigner Analysis
@@ -575,7 +567,6 @@ This section identifies the organizations assigning the most CVEs to understand
575
567
``` python
576
568
# Group by CVE Assigner for 2023 and 2024 with severity breakdown and total counts
577
569
578
-
579
570
def get_top_assigners_with_totals (df , year ):
580
571
assigner_data = (
581
572
df.groupby(' CVE_Assigner' )
@@ -651,6 +642,4 @@ top_assigners = {
651
642
# Save the top_assigners metrics to a JSON file
652
643
with open (" ../../data/2024_insights/output/top_assigners.json" , " w" ) as f:
653
644
json.dump(top_assigners, f)
654
-
655
- top_assigners
656
645
```
0 commit comments