Added new file extensions and 3 additional web elements to search

OSINT-TECHNOLOGIES · web-flow · commit f9705a1e806e · 2024-08-15T22:45:49.000+03:00
diff --git a/pagesearch/pagesearch_parsers.py b/pagesearch/pagesearch_parsers.py
@@ -70,14 +70,29 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
                 total_emails.append(emails)
                 if not emails:
                     emails = ['None']
+                hidden_inputs = soup.find_all(type='hidden')
+                search_query_input = soup.find('input', {'name': 'q'})
+                customization_input = soup.find('input', {'name': 'language'})
+                passwords = soup.find_all('input', {'type': 'password'})
                 print(Fore.GREEN + "Page URL: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{url}" + Style.RESET_ALL)
                 print(Fore.GREEN + "Page title: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{title}" + Style.RESET_ALL)
                 print(Fore.GREEN + "Founded e-mails: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{', '.join(emails)}" + Style.RESET_ALL)
+
+                if customization_input:
+                    print(Fore.GREEN + "Found site customization setting: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{customization_input.get('value')}" + Style.RESET_ALL)
+                if search_query_input:
+                    print(Fore.GREEN + "Found search query: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{search_query_input.get('value')}" + Style.RESET_ALL)
+                for hidden_input in hidden_inputs:
+                    print(Fore.GREEN + "Found hidden form data: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{hidden_input.get('value')}" + Style.RESET_ALL)
+                for password in passwords:
+                    if password is not None:
+                        print(Fore.GREEN + "Found exposed password: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{password.get('value')}" + Style.RESET_ALL)
+
                 links = soup.find_all('a')
                 for link in links:
                     href = link.get('href')
                     if href:
-                        if href.lower().endswith(('.docx', '.xlsx', '.csv', '.pdf', '.pptx', '.doc', '.ppt', '.xls', '.rtf')):
+                        if href.lower().endswith(('.docx', '.xlsx', '.csv', '.pdf', '.pptx', '.doc', '.ppt', '.xls', '.rtf', '.conf', '.config', '.db', '.sql', '.json', '.txt')):
                             document_url = 'http://' + url + href
                             print(Fore.GREEN + "Found document: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{document_url}" + Style.RESET_ALL)
                             response = requests.get(document_url)
@@ -152,11 +167,40 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
                                         file.write(response.content)
                                     files_counter += 1
                                     print(Fore.GREEN + "File was successfully saved")
+                                elif href and href.lower().endswith(('.sql')):
+                                    filename = os.path.basename(href)
+                                    extracted_path = os.path.join(ps_docs_path, f"extracted_{os.path.splitext(filename)[0]}.sql")
+                                    with open(extracted_path, 'wb') as file:
+                                        file.write(response.content)
+                                    files_counter += 1
+                                    print(Fore.GREEN + "File was successfully saved")
+                                elif href and href.lower().endswith(('.db')):
+                                    filename = os.path.basename(href)
+                                    extracted_path = os.path.join(ps_docs_path, f"extracted_{os.path.splitext(filename)[0]}.db")
+                                    with open(extracted_path, 'wb') as file:
+                                        file.write(response.content)
+                                    files_counter += 1
+                                    print(Fore.GREEN + "File was successfully saved")
+                                elif href and href.lower().endswith(('.config')):
+                                    filename = os.path.basename(href)
+                                    extracted_path = os.path.join(ps_docs_path, f"extracted_{os.path.splitext(filename)[0]}.config")
+                                    with open(extracted_path, 'wb') as file:
+                                        file.write(response.content)
+                                    files_counter += 1
+                                    print(Fore.GREEN + "File was successfully saved")
+                                elif href and href.lower().endswith(('.conf')):
+                                    filename = os.path.basename(href)
+                                    extracted_path = os.path.join(ps_docs_path, f"extracted_{os.path.splitext(filename)[0]}.conf")
+                                    with open(extracted_path, 'wb') as file:
+                                        file.write(response.content)
+                                    files_counter += 1
+                                    print(Fore.GREEN + "File was successfully saved")
                 print(Fore.LIGHTGREEN_EX + "-------------------------------------------------")
         except Exception as e:
             print(Fore.RED + "File extraction failed. Reason: {}".format(e) + Style.RESET_ALL)
             print(Fore.LIGHTGREEN_EX + "-------------------------------------------------" + Style.RESET_ALL)
             pass
+
     ps_emails_list = [x for x in total_emails if x]
     ps_emails_return = [', '.join(sublist) for sublist in ps_emails_list]
     clean_bad_pdfs(ps_docs_path)
@@ -172,7 +216,7 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
     elif keywords_flag == 0:
         print(Fore.RED + "Keywords gathering won't start because of None user input" + Style.RESET_ALL)
     print(Fore.LIGHTGREEN_EX + "-------------------------------------------------" + Style.RESET_ALL)
-    print(Fore.GREEN + f"\nDuring PageSearch process:\n[+] Total {len(subdomains_list)} subdomains were checked")
+    print(Fore.GREEN + f"\nDuring subdomains analysis:\n[+] Total {len(subdomains_list)} subdomains were checked")
     print(Fore.GREEN + f"[+] Among them, {accessible_subdomains} subdomains were accessible")
     print(Fore.GREEN + f"[+] In result, {len(ps_emails_return)} unique e-mail addresses were found")
     print(Fore.GREEN + f"[+] Also, {files_counter} files were extracted")
@@ -181,3 +225,5 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
     else:
         print(Fore.GREEN + f"[+] Total {pdf_with_keywords} keywords were found in PDF files")
     return ps_emails_return
+
+