Skip to content

Commit 7efcbc5

Browse files
Added found keywords counter
1 parent 025e58d commit 7efcbc5

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

pagesearch/pagesearch_parsers.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ def find_keywords_in_pdfs(ps_docs_path, keywords: list) -> dict:
2020
try:
2121
pdf_files = [f for f in os.listdir(ps_docs_path) if f.lower().endswith(".pdf")]
2222
results = {}
23+
pdf_with_keywords = 0
2324
for pdf_file in pdf_files:
2425
pdf_path = os.path.join(ps_docs_path, pdf_file)
2526
extracted_text = extract_text_from_pdf(pdf_path)
@@ -28,7 +29,8 @@ def find_keywords_in_pdfs(ps_docs_path, keywords: list) -> dict:
2829
if pdf_file not in results:
2930
results[pdf_file] = []
3031
results[pdf_file].append(keyword)
31-
return results
32+
pdf_with_keywords += 1
33+
return results, pdf_with_keywords
3234
except Exception as e:
3335
print(Fore.RED + f"Can't find keywords. Reason: {e}")
3436
pass
@@ -162,7 +164,7 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
162164
if keywords_flag == 1:
163165
print(Fore.GREEN + "Searching keywords in PDF files..." + Style.RESET_ALL)
164166
try:
165-
pdf_results = find_keywords_in_pdfs(ps_docs_path, keywords)
167+
pdf_results, pdf_with_keywords = find_keywords_in_pdfs(ps_docs_path, keywords)
166168
for pdf_file, found_keywords in pdf_results.items():
167169
print(Fore.GREEN + f"Keywords " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{', '.join(found_keywords)}" + Style.RESET_ALL + Fore.GREEN + f" found in '{pdf_file}'" + Style.RESET_ALL)
168170
except Exception as e:
@@ -174,4 +176,8 @@ def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
174176
print(Fore.GREEN + f"[+] Among them, {accessible_subdomains} subdomains were accessible")
175177
print(Fore.GREEN + f"[+] In result, {len(ps_emails_return)} unique e-mail addresses were found")
176178
print(Fore.GREEN + f"[+] Also, {files_counter} files were extracted")
179+
if keywords_flag == 0:
180+
print(Fore.GREEN + "[+] Keywords were not gathered because of None user input")
181+
else:
182+
print(Fore.GREEN + f"[+] Total {pdf_with_keywords} keywords were found in PDF files")
177183
return ps_emails_return

0 commit comments

Comments
 (0)