Skip to content

Commit ca343ec

Browse files
committed
improve logs and cli usage
1 parent 142fa79 commit ca343ec

File tree

6 files changed

+159
-178
lines changed

6 files changed

+159
-178
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ share/python-wheels/
3030
*.egg
3131
MANIFEST
3232

33+
output/
34+
3335
# PyInstaller
3436
# Usually these files are written by a python script from a template
3537
# before PyInstaller builds the exe, so as to inject date/other infos into it.

classification.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,26 @@
77
from typing import List
88
from ai_prompts import classification_prompt
99
import math
10+
from colorama import Fore, Style
1011

1112
load_dotenv()
1213
api_key = os.getenv('GEMINI_API_KEY')
13-
if not api_key:
14-
raise ValueError("GEMINI_API_KEY não encontrada no arquivo .env")
14+
use_ai = True
1515

16-
genai.configure(api_key=api_key)
17-
model = genai.GenerativeModel("gemini-2.0-flash-exp")
16+
if not api_key:
17+
print(Fore.YELLOW + "[theWatcher] GEMINI_API_KEY not found in .env file. AI classification disabled." + Style.RESET_ALL)
18+
use_ai = False
19+
else:
20+
try:
21+
genai.configure(api_key=api_key)
22+
model = genai.GenerativeModel("gemini-2.0-flash-exp")
23+
except Exception as e:
24+
print(Fore.YELLOW + f"[theWatcher] Error configuring AI model: {e}. AI classification disabled." + Style.RESET_ALL)
25+
use_ai = False
1826

19-
def classify_fd_titles(titles: List[str], batch_size: int = 20, use_ai: bool = True) -> List[dict]:
20-
if not use_ai or not api_key:
21-
print("AI classification disabled. Including all titles.")
27+
def classify_fd_titles(titles: List[str], month: str, batch_size: int = 20) -> List[dict]:
28+
if not use_ai:
29+
print(Fore.YELLOW + "[theWatcher] AI classification disabled. Skipping filtering." + Style.RESET_ALL)
2230
return [{"index": i, "title": t, "is_vulnerability": True} for i, t in enumerate(titles)]
2331

2432
results = []
@@ -30,10 +38,10 @@ def classify_fd_titles(titles: List[str], batch_size: int = 20, use_ai: bool = T
3038
batch_num = i // batch_size + 1
3139
if current_batch != batch_num:
3240
current_batch = batch_num
33-
print(f"[Classification] Processing batch {batch_num}/{total_batches}")
41+
print(Fore.BLUE + f"[theWatcher] Filtering items with AI (batch {batch_num}/{total_batches}) - month: {month}" + Style.RESET_ALL)
3442

3543
batch = [{"index": j, "title": titles[j]} for j in range(i, min(i+batch_size, len(titles)))]
36-
44+
3745
tries = 0
3846
while tries < 3:
3947
tries += 1

main.py

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import argparse
22
import json
33
from datetime import datetime, timedelta
4+
import time
45
from dateutil.parser import parse
56
from summarizer import summarize_vulnerabilities
67
import os
7-
from scrapers.sources import get_full_disclosure_latest, get_exploitdb_rss
8-
from scrapers.nist import get_nist_cves
9-
from scrapers.nist import Severity
8+
from scrapers.sources import get_full_disclosure_latest, get_exploitdb_rss, get_nist_cves, Severity
9+
from colorama import init, Fore, Style
10+
import sys
1011

1112
def parse_args():
1213
parser = argparse.ArgumentParser(
@@ -50,8 +51,8 @@ def parse_args():
5051

5152
# Limit options
5253
limit_group = parser.add_argument_group('Limits')
53-
limit_group.add_argument('--max-items', '-m', type=int,
54-
help='Maximum number of vulnerabilities to retrieve')
54+
limit_group.add_argument('--max-items', '-m', type=int, default=100,
55+
help='Maximum number of vulnerabilities to retrieve per source')
5556
limit_group.add_argument('--min-severity', '-M',
5657
choices=['LOW', 'MEDIUM', 'HIGH', 'CRITICAL'],
5758
help='Minimum severity level for vulnerabilities')
@@ -74,14 +75,11 @@ def parse_args():
7475
args.type = 'all'
7576
args.collect = True
7677
args.summarize = True
77-
args.days = 30
78-
# Somente sobrescrever max_items se o usuário não tiver definido -m
79-
if not args.max_items:
80-
args.max_items = None
81-
args.include_unclassified = True
78+
args.days = args.days or 30
79+
args.include_unclassified = args.include_unclassified or False
8280

8381
if args.quick_scan:
84-
args.type = 'sources'
82+
args.type = 'all'
8583
args.collect = True
8684
args.summarize = True
8785
args.days = 7
@@ -125,28 +123,20 @@ def collect_vulnerabilities(args):
125123
max_items = args.max_items
126124
source_type = 'all' if args.type == 'all' else ('nist' if args.type == 'nist' else 'sources')
127125

128-
# Calculate max items per source
129-
if args.type == 'all' and max_items:
130-
active_sources = len(args.sources) if 'nist' not in args.sources else len(args.sources) + 1
131-
max_per_source = max_items // active_sources
132-
else:
133-
max_per_source = max_items
134-
135-
print(f"Total limit: {max_items}, Per source limit: {max_per_source}")
126+
print(f"Total limit per source: {max_items}")
136127

137128
if args.type in ['sources', 'all']:
138129
print(f"Collecting items from sources between {args.start_date.strftime('%Y-%m-%d')} and {args.end_date.strftime('%Y-%m-%d')}...")
139130

140131
if 'fulldisclosure' in args.sources:
141132
fd_vulns = get_full_disclosure_latest(args.start_date, args.end_date, use_ai=not args.no_ai)
142-
fd_vulns = fd_vulns[:max_per_source] if max_per_source else fd_vulns
133+
fd_vulns = fd_vulns[:max_items] if max_items else fd_vulns
143134
vulns.extend(fd_vulns)
144135
print(f"Full Disclosure results: {len(fd_vulns)}")
145136

146137
if 'exploitdb' in args.sources:
147-
remaining = max_items - len(vulns) if max_items else None
148138
edb_vulns = get_exploitdb_rss(args.start_date, args.end_date)
149-
edb_vulns = edb_vulns[:max_per_source] if max_per_source else edb_vulns
139+
edb_vulns = edb_vulns[:max_items] if max_items else edb_vulns
150140
vulns.extend(edb_vulns)
151141
print(f"Exploit-DB results: {len(edb_vulns)}")
152142

@@ -157,23 +147,30 @@ def collect_vulnerabilities(args):
157147
start_date=args.start_date,
158148
end_date=args.end_date,
159149
classified_only=not args.include_unclassified,
160-
max_cves=max_per_source, # Usa o mesmo limite por fonte
150+
max_cves=max_items, # Apply the limit per source
161151
min_severity=args.min_severity
162152
)
163153
vulns.extend(nist_vulns)
164154
print(f"NIST CVE results: {len(nist_vulns)}")
165155

166-
# Aplica limite final ao total
167-
if max_items and len(vulns) > max_items:
168-
vulns = vulns[:max_items]
169-
print(f"Applied final limit: {len(vulns)}/{max_items} vulnerabilities")
170-
171156
output_file = f"{args.output_dir}/{source_type}_vulnerabilities.json"
172157
save_to_json(vulns, output_file)
173158
print(f"Data saved to {output_file}")
174159
return vulns, source_type
175160

176161
def main():
162+
init(autoreset=True)
163+
print(Fore.GREEN + r"""
164+
_ _ __ __ _ _
165+
| | | | \ \ / / | | | |
166+
| |_| |__ __\ \ /\ / /_ _| |_ ___| |__ ___ _ __
167+
| __| '_ \ / _ \ \/ \/ / _` | __/ __| '_ \ / _ \ '__|
168+
| |_| | | | __/\ /\ / (_| | || (__| | | | __/ |
169+
\__|_| |_|\___| \/ \/ \__,_|\__\___|_| |_|\___|_|
170+
171+
""", Style.RESET_ALL)
172+
print(Fore.BLUE + "[theWatcher] Starting theWatcher..." + Style.RESET_ALL)
173+
177174
args = parse_args()
178175

179176
if not args.collect and not args.summarize:
@@ -184,9 +181,11 @@ def main():
184181
source_type = 'sources' # default
185182

186183
if args.collect:
184+
print(Fore.CYAN + "[theWatcher] Collecting vulnerabilities..." + Style.RESET_ALL)
187185
vulns, source_type = collect_vulnerabilities(args)
188-
186+
189187
if args.summarize:
188+
print(Fore.CYAN + "[theWatcher] Summarizing vulnerabilities..." + Style.RESET_ALL)
190189
if not args.no_ai:
191190
try:
192191
input_file = f"{args.output_dir}/{source_type}_vulnerabilities.json"
@@ -197,6 +196,8 @@ def main():
197196
print(f"Expected input file: {input_file}")
198197
else:
199198
print("Summarization skipped (AI disabled)")
199+
200+
print(Fore.GREEN + "[theWatcher] Done." + Style.RESET_ALL)
200201

201202
if __name__ == "__main__":
202203
main()

scrapers/nist.py

Lines changed: 0 additions & 133 deletions
This file was deleted.

0 commit comments

Comments
 (0)