@@ -55,12 +55,14 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
55
55
if not os .path .exists (ps_docs_path ):
56
56
os .makedirs (ps_docs_path )
57
57
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
58
+ total_emails = []
58
59
for url in subdomains_list :
59
60
try :
60
61
response = requests .get ('http://' + url )
61
62
soup = BeautifulSoup (response .content , 'html.parser' )
62
63
title = soup .title .string
63
64
emails = re .findall (email_pattern , soup .text )
65
+ total_emails .append (emails )
64
66
if not emails :
65
67
emails = ['None' ]
66
68
print (Fore .GREEN + "Page URL: " + Fore .LIGHTCYAN_EX + Style .BRIGHT + f"{ url } " + Style .RESET_ALL )
@@ -70,7 +72,6 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
70
72
for link in links :
71
73
href = link .get ('href' )
72
74
if href :
73
- #print(f"Found link: {href}") # Debugging line
74
75
if href .lower ().endswith (('.docx' , '.xlsx' , '.csv' , '.pdf' , '.pptx' , '.doc' , '.ppt' , '.xls' , '.rtf' )):
75
76
document_url = 'http://' + url + href
76
77
print (Fore .GREEN + "Found document: " + Fore .LIGHTCYAN_EX + Style .BRIGHT + f"{ document_url } " + Style .RESET_ALL )
@@ -141,6 +142,9 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
141
142
print (Fore .RED + "File extraction failed. Reason: {}" .format (e ) + Style .RESET_ALL )
142
143
print (Fore .LIGHTGREEN_EX + "-------------------------------------------------" + Style .RESET_ALL )
143
144
pass
145
+ ps_emails_list = [x for x in total_emails if x ]
146
+ ps_emails_return = [', ' .join (sublist ) for sublist in ps_emails_list ]
147
+ #print(ps_emails_return)
144
148
clean_bad_pdfs (ps_docs_path )
145
149
if keywords_flag == 1 :
146
150
print (Fore .GREEN + "Starting keywords searching in PDF files" + Style .RESET_ALL )
@@ -153,3 +157,4 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
153
157
elif keywords_flag == 0 :
154
158
print (Fore .RED + "Keywords gathering won't start because of None user input" + Style .RESET_ALL )
155
159
print (Fore .LIGHTGREEN_EX + "-------------------------------------------------" + Style .RESET_ALL )
160
+ return ps_emails_return
0 commit comments