Skip to content

Commit 6d14a94

Browse files
Normalized subdomains emails output for PDF report (part of #42)
1 parent 53bf777 commit 6d14a94

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

datagather_modules/crawl_processor.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,12 @@ def subdomains_mail_gather(url):
4949
r = requests.get(url)
5050
data = r.text
5151
soup = BeautifulSoup(data, "html.parser")
52-
mails = []
52+
mails_uncleaned = []
5353
for i in soup.find_all(href=re.compile("mailto")):
5454
i.encode().decode()
55-
mails.append(i.string)
55+
mails_uncleaned.append(i.string)
56+
mails_cleaned = [item for item in mails_uncleaned if item is not None]
57+
mails = [''.join(sublist) for sublist in mails_cleaned]
5658
return mails
5759
except requests.RequestException as e:
5860
print(Fore.RED + "Error while gathering e-mails. Reason: {}".format(e))
@@ -165,6 +167,7 @@ def domains_reverse_research(subdomains, report_file_type):
165167

166168
subdomain_mails = [sublist for sublist in subdomain_mails if sublist]
167169
subdomain_mails = [sublist for sublist in subdomain_mails if sublist != [None]]
170+
subdomain_mails = list(map(''.join, subdomain_mails))
168171
subdomain_socials = [{k: v for k, v in d.items() if v} for d in subdomain_socials]
169172
subdomain_socials = [d for d in subdomain_socials if d]
170173
subdomain_socials_grouped = defaultdict(list)

0 commit comments

Comments
 (0)