|
| 1 | +import requests |
| 2 | +from bs4 import BeautifulSoup |
| 3 | +import re |
| 4 | +from colorama import Fore, Style |
| 5 | +import os |
| 6 | +def subdomains_parser(subdomains_list, report_folder): |
| 7 | + ps_docs_path = report_folder + '//ps_documents' |
| 8 | + if not os.path.exists(ps_docs_path): |
| 9 | + os.makedirs(ps_docs_path) |
| 10 | + email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' |
| 11 | + counter = 1 |
| 12 | + for url in subdomains_list: |
| 13 | + try: |
| 14 | + response = requests.get('http://' + url) |
| 15 | + soup = BeautifulSoup(response.content, 'html.parser') |
| 16 | + title = soup.title.string |
| 17 | + emails = re.findall(email_pattern, soup.text) |
| 18 | + print(Fore.GREEN + f"Page URL: {url}" + Style.RESET_ALL) |
| 19 | + print(Fore.GREEN + f"Page title: {title}" + Style.RESET_ALL) |
| 20 | + print(Fore.GREEN + f"Founded e-mails: {', '.join(emails)}" + Style.RESET_ALL) |
| 21 | + links = soup.find_all('a') |
| 22 | + for link in links: |
| 23 | + href = link.get('href') |
| 24 | + if href: |
| 25 | + #print(f"Found link: {href}") # Debugging line |
| 26 | + if href.lower().endswith(('.docx', '.xlsx', '.csv', '.pdf', '.pptx', '.doc', '.ppt', '.xls', '.rtf')): |
| 27 | + document_url = 'http://' + url + href |
| 28 | + print(Fore.GREEN + f"Found document: {document_url}" + Style.RESET_ALL) |
| 29 | + response = requests.get(document_url) |
| 30 | + if response.status_code == 200: |
| 31 | + if href and href.lower().endswith(('.docx')): |
| 32 | + filename = os.path.basename(href) |
| 33 | + extracted_path = os.path.join(ps_docs_path, f"extracted_{counter}_{os.path.splitext(filename)[0]}.docx") |
| 34 | + with open(extracted_path, 'wb') as file: |
| 35 | + file.write(response.content) |
| 36 | + print(Fore.GREEN + f"File {filename} was successfully saved") |
| 37 | + elif href and href.lower().endswith(('.xlsx')): |
| 38 | + filename = os.path.basename(href) |
| 39 | + extracted_path = os.path.join(ps_docs_path, f"extracted_{counter}_{os.path.splitext(filename)[0]}.xlsx") |
| 40 | + with open(extracted_path, 'wb') as file: |
| 41 | + file.write(response.content) |
| 42 | + print(Fore.GREEN + f"File {filename} was successfully saved") |
| 43 | + elif href and href.lower().endswith(('.pdf')): |
| 44 | + filename = os.path.basename(href) |
| 45 | + extracted_path = os.path.join(ps_docs_path, f"extracted_{counter}_{os.path.splitext(filename)[0]}.pdf") |
| 46 | + with open(extracted_path, 'wb') as file: |
| 47 | + file.write(response.content) |
| 48 | + print(Fore.GREEN + f"File {filename} was successfully saved") |
| 49 | + elif href and href.lower().endswith(('.csv')): |
| 50 | + filename = os.path.basename(href) |
| 51 | + extracted_path = os.path.join(ps_docs_path, f"extracted_{counter}_{os.path.splitext(filename)[0]}.csv") |
| 52 | + with open(extracted_path, 'wb') as file: |
| 53 | + file.write(response.content) |
| 54 | + print(Fore.GREEN + f"File {filename} was successfully saved") |
| 55 | + elif href and href.lower().endswith(('.pptx')): |
| 56 | + filename = os.path.basename(href) |
| 57 | + extracted_path = os.path.join(ps_docs_path, f"extracted_{counter}_{os.path.splitext(filename)[0]}.pptx") |
| 58 | + with open(extracted_path, 'wb') as file: |
| 59 | + file.write(response.content) |
| 60 | + print(Fore.GREEN + f"File {filename} was successfully saved") |
| 61 | + elif href and href.lower().endswith(('.doc')): |
| 62 | + filename = os.path.basename(href) |
| 63 | + extracted_path = os.path.join(ps_docs_path, f"extracted_{counter}_{os.path.splitext(filename)[0]}.doc") |
| 64 | + with open(extracted_path, 'wb') as file: |
| 65 | + file.write(response.content) |
| 66 | + print(Fore.GREEN + f"File {filename} was successfully saved") |
| 67 | + elif href and href.lower().endswith(('.ppt')): |
| 68 | + filename = os.path.basename(href) |
| 69 | + extracted_path = os.path.join(ps_docs_path, f"extracted_{counter}_{os.path.splitext(filename)[0]}.ppt") |
| 70 | + with open(extracted_path, 'wb') as file: |
| 71 | + file.write(response.content) |
| 72 | + print(Fore.GREEN + f"File {filename} was successfully saved") |
| 73 | + elif href and href.lower().endswith(('.xls')): |
| 74 | + filename = os.path.basename(href) |
| 75 | + extracted_path = os.path.join(ps_docs_path, f"extracted_{counter}_{os.path.splitext(filename)[0]}.xls") |
| 76 | + with open(extracted_path, 'wb') as file: |
| 77 | + file.write(response.content) |
| 78 | + print(Fore.GREEN + f"File {filename} was successfully saved") |
| 79 | + elif href and href.lower().endswith(('.rtf')): |
| 80 | + filename = os.path.basename(href) |
| 81 | + extracted_path = os.path.join(ps_docs_path, f"extracted_{counter}_{os.path.splitext(filename)[0]}.rtf") |
| 82 | + with open(extracted_path, 'wb') as file: |
| 83 | + file.write(response.content) |
| 84 | + print(Fore.GREEN + f"File {filename} was successfully saved") |
| 85 | + else: |
| 86 | + print(Fore.RED + "Error" + Style.RESET_ALL) |
| 87 | + print(Fore.LIGHTGREEN_EX + "-------------------------------------------------") |
| 88 | + except Exception as e: |
| 89 | + print(Fore.RED + "Error {}".format(e) + Style.RESET_ALL) |
| 90 | + pass |
0 commit comments