Skip to content

Commit fde59b8

Browse files
committed
bitbucket added
1 parent 90364d9 commit fde59b8

File tree

3 files changed

+189
-2
lines changed

3 files changed

+189
-2
lines changed

theHarvester/__main__.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#!/usr/bin/env python3
21
import argparse
32
import asyncio
43
import os
@@ -17,6 +16,7 @@
1716
api_endpoints,
1817
baidusearch,
1918
bevigil,
19+
bitbucket,
2020
bravesearch,
2121
bufferoverun,
2222
builtwith,
@@ -167,7 +167,7 @@ async def start(rest_args: argparse.Namespace | None = None):
167167
parser.add_argument(
168168
'-b',
169169
'--source',
170-
help="""baidu, bevigil, brave, bufferoverun,
170+
help="""baidu, bevigil, bitbucket, brave, bufferoverun,
171171
builtwith, censys, certspotter, chaos, commoncrawl, criminalip, crtsh, dehashed, dnsdumpster, duckduckgo, fofa, fullhunt, github-code,
172172
gitlab, hackertarget, haveibeenpwned, hudsonrock, hunter, hunterhow, intelx, leakix, leaklookup, netlas, onyphe, otx, pentesttools,
173173
projectdiscovery, rapiddns, robtex, rocketreach, securityscorecard, securityTrails, shodan, subdomaincenter,
@@ -417,6 +417,21 @@ async def store(
417417
)
418418
except Exception as e:
419419
show_default_error_message(engineitem, word, error=e)
420+
elif engineitem == 'bitbucket-code':
421+
try:
422+
bitbucket_search = bitbucket.SearchBitBucketCode(word, limit)
423+
stor_lst.append(
424+
store(
425+
bitbucket_search,
426+
engineitem,
427+
store_host=True,
428+
store_emails=True,
429+
)
430+
)
431+
except MissingKey as ex:
432+
if not args.quiet:
433+
print(f'A Missing Key error occurred in bitbucket: {ex}')
434+
420435

421436
elif engineitem == 'brave':
422437
try:
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
import asyncio
2+
import random
3+
import urllib.parse as urlparse
4+
from typing import Any, NamedTuple
5+
6+
import aiohttp
7+
8+
from theHarvester.discovery.constants import MissingKey, get_delay
9+
from theHarvester.lib.core import Core
10+
from theHarvester.parsers import myparser
11+
12+
13+
class RetryResult(NamedTuple):
14+
time: float
15+
16+
17+
class SuccessResult(NamedTuple):
18+
fragments: list[str]
19+
next_page: int
20+
last_page: int
21+
22+
23+
class ErrorResult(NamedTuple):
24+
status_code: int
25+
body: Any
26+
27+
28+
class SearchBitBucketCode:
29+
def __init__(self, word, limit) -> None:
30+
try:
31+
self.word = word
32+
self.total_results = ''
33+
self.server = 'api.bitbucket.org'
34+
self.limit = limit
35+
self.counter = 0
36+
self.page = 1
37+
self.key = Core.bitbucket_key()
38+
if self.key is None:
39+
raise MissingKey('BitBucket')
40+
self.proxy = False
41+
self.base_url = f'https://{self.server}/2.0/repositories/"{self.word}"/src' #Word must contain username and repo
42+
self.headers = {
43+
'Host': self.server,
44+
'User-agent': Core.get_user_agent(),
45+
'Authorization': f'token {self.key}',
46+
}
47+
# Retry control to avoid infinite loops on rate limiting
48+
self.retry_count = 0
49+
self.max_retries = 3
50+
except Exception as e:
51+
print(f'Error initializing SearchBitbucketCode: {e}')
52+
raise
53+
54+
@staticmethod
55+
async def fragments_from_response(json_data: dict) -> list[str]:
56+
try:
57+
return [
58+
match['fragment']
59+
for item in json_data.get('items', [])
60+
for match in item.get('text_matches', [])
61+
if match.get('fragment') is not None
62+
]
63+
except Exception as e:
64+
print(f'Error extracting fragments: {e}')
65+
return []
66+
67+
@staticmethod
68+
async def page_from_response(page: str, links) -> int | None:
69+
try:
70+
if page_link := links.get(page):
71+
parsed = urlparse.urlparse(str(page_link.get('url')))
72+
if page_param := urlparse.parse_qs(parsed.query).get('page', [None])[0]:
73+
return int(page_param)
74+
return 0
75+
except Exception as e:
76+
print(f'Error parsing page response: {e}')
77+
return None
78+
79+
async def handle_response(self, response: tuple[str, dict, int, Any]) -> ErrorResult | RetryResult | SuccessResult:
80+
try:
81+
text, json_data, status, links = response
82+
if status == 200:
83+
results = await self.fragments_from_response(json_data)
84+
# Ensure next_page and last_page default to 0 if None
85+
next_page = await self.page_from_response('next', links) or 0
86+
last_page = await self.page_from_response('last', links) or 0
87+
return SuccessResult(results, next_page, last_page)
88+
if status in (429, 403):
89+
return RetryResult(60)
90+
return ErrorResult(status, json_data if isinstance(json_data, dict) else text)
91+
except Exception as e:
92+
print(f'Error handling response: {e}')
93+
return ErrorResult(500, str(e))
94+
95+
@staticmethod
96+
async def next_page_or_end(result: SuccessResult) -> int | None:
97+
if result.next_page is not None:
98+
return result.next_page
99+
else:
100+
return result.last_page
101+
102+
async def do_search(self, page: int) -> tuple[str, dict, int, Any]:
103+
try:
104+
url = f'{self.base_url}&page={page}' if page else self.base_url
105+
async with aiohttp.ClientSession(headers=self.headers) as sess:
106+
async with sess.get(url, proxy=random.choice(Core.proxy_list()) if self.proxy else None) as resp:
107+
return await resp.text(), await resp.json(), resp.status, resp.links
108+
except Exception as e:
109+
print(f'Error performing search: {e}')
110+
return '', {}, 500, {}
111+
112+
async def process(self, proxy: bool = False) -> None:
113+
try:
114+
self.proxy = proxy
115+
while self.counter <= self.limit and self.page != 0:
116+
try:
117+
api_response = await self.do_search(self.page)
118+
result = await self.handle_response(api_response)
119+
120+
if isinstance(result, SuccessResult):
121+
# Reset retry counter on any successful response
122+
self.retry_count = 0
123+
print(f'\tSearching {self.counter} results.')
124+
self.total_results += ''.join(result.fragments)
125+
self.counter += len(result.fragments)
126+
next_or_last = result.next_page or result.last_page
127+
# Break if pagination does not advance to avoid infinite loop
128+
if next_or_last == self.page:
129+
print('\tNo page advancement detected; exiting to avoid infinite loop.')
130+
self.page = 0
131+
break
132+
self.page = next_or_last
133+
await asyncio.sleep(get_delay())
134+
elif isinstance(result, RetryResult):
135+
self.retry_count += 1
136+
if self.retry_count > self.max_retries:
137+
print('\tMaximum retries reached; exiting to avoid infinite loop.')
138+
self.page = 0
139+
break
140+
sleepy_time = get_delay() + result.time
141+
print(f'\tRetrying page in {sleepy_time} seconds...')
142+
await asyncio.sleep(sleepy_time)
143+
else:
144+
# On error, stop to avoid endless retries on a bad state
145+
print(f'\tException occurred: status_code: {result.status_code} reason: {result.body}')
146+
self.page = 0
147+
break
148+
except Exception as e:
149+
print(f'Error processing page: {e}')
150+
await asyncio.sleep(get_delay())
151+
except Exception as e:
152+
print(f'An exception has occurred in bitbucketcode process: {e}')
153+
154+
async def get_emails(self):
155+
try:
156+
rawres = myparser.Parser(self.total_results, self.word)
157+
return await rawres.emails()
158+
except Exception as e:
159+
print(f'Error getting emails: {e}')
160+
return []
161+
162+
async def get_hostnames(self):
163+
try:
164+
rawres = myparser.Parser(self.total_results, self.word)
165+
return await rawres.hostnames()
166+
except Exception as e:
167+
print(f'Error getting hostnames: {e}')
168+
return []

theHarvester/lib/core.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ def bevigil_key() -> str:
6262
def bing_key() -> str:
6363
return Core.api_keys()['bing']['key']
6464

65+
@staticmethod
66+
def bitbucket_key() -> str:
67+
return Core.api_keys()['bitbucket']['key']
68+
6569
@staticmethod
6670
def brave_key() -> str:
6771
return Core.api_keys()['brave']['key']

0 commit comments

Comments
 (0)