Skip to content

Commit f9705a1

Browse files
Added new file extensions and 3 additional web elements to search
1 parent 214600c commit f9705a1

File tree

1 file changed

+48
-2
lines changed

1 file changed

+48
-2
lines changed

pagesearch/pagesearch_parsers.py

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,29 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
7070
total_emails.append(emails)
7171
if not emails:
7272
emails = ['None']
73+
hidden_inputs = soup.find_all(type='hidden')
74+
search_query_input = soup.find('input', {'name': 'q'})
75+
customization_input = soup.find('input', {'name': 'language'})
76+
passwords = soup.find_all('input', {'type': 'password'})
7377
print(Fore.GREEN + "Page URL: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{url}" + Style.RESET_ALL)
7478
print(Fore.GREEN + "Page title: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{title}" + Style.RESET_ALL)
7579
print(Fore.GREEN + "Founded e-mails: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{', '.join(emails)}" + Style.RESET_ALL)
80+
81+
if customization_input:
82+
print(Fore.GREEN + "Found site customization setting: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{customization_input.get('value')}" + Style.RESET_ALL)
83+
if search_query_input:
84+
print(Fore.GREEN + "Found search query: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{search_query_input.get('value')}" + Style.RESET_ALL)
85+
for hidden_input in hidden_inputs:
86+
print(Fore.GREEN + "Found hidden form data: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{hidden_input.get('value')}" + Style.RESET_ALL)
87+
for password in passwords:
88+
if password is not None:
89+
print(Fore.GREEN + "Found exposed password: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{password.get('value')}" + Style.RESET_ALL)
90+
7691
links = soup.find_all('a')
7792
for link in links:
7893
href = link.get('href')
7994
if href:
80-
if href.lower().endswith(('.docx', '.xlsx', '.csv', '.pdf', '.pptx', '.doc', '.ppt', '.xls', '.rtf')):
95+
if href.lower().endswith(('.docx', '.xlsx', '.csv', '.pdf', '.pptx', '.doc', '.ppt', '.xls', '.rtf', '.conf', '.config', '.db', '.sql', '.json', '.txt')):
8196
document_url = 'http://' + url + href
8297
print(Fore.GREEN + "Found document: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{document_url}" + Style.RESET_ALL)
8398
response = requests.get(document_url)
@@ -152,11 +167,40 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
152167
file.write(response.content)
153168
files_counter += 1
154169
print(Fore.GREEN + "File was successfully saved")
170+
elif href and href.lower().endswith(('.sql')):
171+
filename = os.path.basename(href)
172+
extracted_path = os.path.join(ps_docs_path, f"extracted_{os.path.splitext(filename)[0]}.sql")
173+
with open(extracted_path, 'wb') as file:
174+
file.write(response.content)
175+
files_counter += 1
176+
print(Fore.GREEN + "File was successfully saved")
177+
elif href and href.lower().endswith(('.db')):
178+
filename = os.path.basename(href)
179+
extracted_path = os.path.join(ps_docs_path, f"extracted_{os.path.splitext(filename)[0]}.db")
180+
with open(extracted_path, 'wb') as file:
181+
file.write(response.content)
182+
files_counter += 1
183+
print(Fore.GREEN + "File was successfully saved")
184+
elif href and href.lower().endswith(('.config')):
185+
filename = os.path.basename(href)
186+
extracted_path = os.path.join(ps_docs_path, f"extracted_{os.path.splitext(filename)[0]}.config")
187+
with open(extracted_path, 'wb') as file:
188+
file.write(response.content)
189+
files_counter += 1
190+
print(Fore.GREEN + "File was successfully saved")
191+
elif href and href.lower().endswith(('.conf')):
192+
filename = os.path.basename(href)
193+
extracted_path = os.path.join(ps_docs_path, f"extracted_{os.path.splitext(filename)[0]}.conf")
194+
with open(extracted_path, 'wb') as file:
195+
file.write(response.content)
196+
files_counter += 1
197+
print(Fore.GREEN + "File was successfully saved")
155198
print(Fore.LIGHTGREEN_EX + "-------------------------------------------------")
156199
except Exception as e:
157200
print(Fore.RED + "File extraction failed. Reason: {}".format(e) + Style.RESET_ALL)
158201
print(Fore.LIGHTGREEN_EX + "-------------------------------------------------" + Style.RESET_ALL)
159202
pass
203+
160204
ps_emails_list = [x for x in total_emails if x]
161205
ps_emails_return = [', '.join(sublist) for sublist in ps_emails_list]
162206
clean_bad_pdfs(ps_docs_path)
@@ -172,7 +216,7 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
172216
elif keywords_flag == 0:
173217
print(Fore.RED + "Keywords gathering won't start because of None user input" + Style.RESET_ALL)
174218
print(Fore.LIGHTGREEN_EX + "-------------------------------------------------" + Style.RESET_ALL)
175-
print(Fore.GREEN + f"\nDuring PageSearch process:\n[+] Total {len(subdomains_list)} subdomains were checked")
219+
print(Fore.GREEN + f"\nDuring subdomains analysis:\n[+] Total {len(subdomains_list)} subdomains were checked")
176220
print(Fore.GREEN + f"[+] Among them, {accessible_subdomains} subdomains were accessible")
177221
print(Fore.GREEN + f"[+] In result, {len(ps_emails_return)} unique e-mail addresses were found")
178222
print(Fore.GREEN + f"[+] Also, {files_counter} files were extracted")
@@ -181,3 +225,5 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
181225
else:
182226
print(Fore.GREEN + f"[+] Total {pdf_with_keywords} keywords were found in PDF files")
183227
return ps_emails_return
228+
229+

0 commit comments

Comments
 (0)