|  | 
|  | 1 | +import asyncio | 
|  | 2 | +import random | 
|  | 3 | +import urllib.parse as urlparse | 
|  | 4 | +from typing import Any, NamedTuple | 
|  | 5 | + | 
|  | 6 | +import aiohttp | 
|  | 7 | + | 
|  | 8 | +from theHarvester.discovery.constants import MissingKey, get_delay | 
|  | 9 | +from theHarvester.lib.core import Core | 
|  | 10 | +from theHarvester.parsers import myparser | 
|  | 11 | + | 
|  | 12 | + | 
|  | 13 | +class RetryResult(NamedTuple): | 
|  | 14 | +    time: float | 
|  | 15 | + | 
|  | 16 | + | 
|  | 17 | +class SuccessResult(NamedTuple): | 
|  | 18 | +    fragments: list[str] | 
|  | 19 | +    next_page: int | 
|  | 20 | +    last_page: int | 
|  | 21 | + | 
|  | 22 | + | 
|  | 23 | +class ErrorResult(NamedTuple): | 
|  | 24 | +    status_code: int | 
|  | 25 | +    body: Any | 
|  | 26 | + | 
|  | 27 | + | 
|  | 28 | +class SearchBitBucketCode: | 
|  | 29 | +    def __init__(self, word, limit) -> None: | 
|  | 30 | +        try: | 
|  | 31 | +            self.word = word | 
|  | 32 | +            self.total_results = '' | 
|  | 33 | +            self.server = 'api.bitbucket.org' | 
|  | 34 | +            self.limit = limit | 
|  | 35 | +            self.counter = 0 | 
|  | 36 | +            self.page = 1 | 
|  | 37 | +            self.key = Core.bitbucket_key() | 
|  | 38 | +            if self.key is None: | 
|  | 39 | +                raise MissingKey('BitBucket') | 
|  | 40 | +            self.proxy = False | 
|  | 41 | +            self.base_url = f'https://{self.server}/2.0/repositories/"{self.word}"/src' #Word must contain username and repo | 
|  | 42 | +            self.headers = { | 
|  | 43 | +                'Host': self.server, | 
|  | 44 | +                'User-agent': Core.get_user_agent(), | 
|  | 45 | +                'Authorization': f'token {self.key}', | 
|  | 46 | +            } | 
|  | 47 | +            # Retry control to avoid infinite loops on rate limiting | 
|  | 48 | +            self.retry_count = 0 | 
|  | 49 | +            self.max_retries = 3 | 
|  | 50 | +        except Exception as e: | 
|  | 51 | +            print(f'Error initializing SearchBitbucketCode: {e}') | 
|  | 52 | +            raise | 
|  | 53 | + | 
|  | 54 | +    @staticmethod | 
|  | 55 | +    async def fragments_from_response(json_data: dict) -> list[str]: | 
|  | 56 | +        try: | 
|  | 57 | +            return [ | 
|  | 58 | +                match['fragment'] | 
|  | 59 | +                for item in json_data.get('items', []) | 
|  | 60 | +                for match in item.get('text_matches', []) | 
|  | 61 | +                if match.get('fragment') is not None | 
|  | 62 | +            ] | 
|  | 63 | +        except Exception as e: | 
|  | 64 | +            print(f'Error extracting fragments: {e}') | 
|  | 65 | +            return [] | 
|  | 66 | + | 
|  | 67 | +    @staticmethod | 
|  | 68 | +    async def page_from_response(page: str, links) -> int | None: | 
|  | 69 | +        try: | 
|  | 70 | +            if page_link := links.get(page): | 
|  | 71 | +                parsed = urlparse.urlparse(str(page_link.get('url'))) | 
|  | 72 | +                if page_param := urlparse.parse_qs(parsed.query).get('page', [None])[0]: | 
|  | 73 | +                    return int(page_param) | 
|  | 74 | +            return 0 | 
|  | 75 | +        except Exception as e: | 
|  | 76 | +            print(f'Error parsing page response: {e}') | 
|  | 77 | +            return None | 
|  | 78 | + | 
|  | 79 | +    async def handle_response(self, response: tuple[str, dict, int, Any]) -> ErrorResult | RetryResult | SuccessResult: | 
|  | 80 | +        try: | 
|  | 81 | +            text, json_data, status, links = response | 
|  | 82 | +            if status == 200: | 
|  | 83 | +                results = await self.fragments_from_response(json_data) | 
|  | 84 | +                # Ensure next_page and last_page default to 0 if None | 
|  | 85 | +                next_page = await self.page_from_response('next', links) or 0 | 
|  | 86 | +                last_page = await self.page_from_response('last', links) or 0 | 
|  | 87 | +                return SuccessResult(results, next_page, last_page) | 
|  | 88 | +            if status in (429, 403): | 
|  | 89 | +                return RetryResult(60) | 
|  | 90 | +            return ErrorResult(status, json_data if isinstance(json_data, dict) else text) | 
|  | 91 | +        except Exception as e: | 
|  | 92 | +            print(f'Error handling response: {e}') | 
|  | 93 | +            return ErrorResult(500, str(e)) | 
|  | 94 | + | 
|  | 95 | +    @staticmethod | 
|  | 96 | +    async def next_page_or_end(result: SuccessResult) -> int | None: | 
|  | 97 | +        if result.next_page is not None: | 
|  | 98 | +            return result.next_page | 
|  | 99 | +        else: | 
|  | 100 | +            return result.last_page | 
|  | 101 | + | 
|  | 102 | +    async def do_search(self, page: int) -> tuple[str, dict, int, Any]: | 
|  | 103 | +        try: | 
|  | 104 | +            url = f'{self.base_url}&page={page}' if page else self.base_url | 
|  | 105 | +            async with aiohttp.ClientSession(headers=self.headers) as sess: | 
|  | 106 | +                async with sess.get(url, proxy=random.choice(Core.proxy_list()) if self.proxy else None) as resp: | 
|  | 107 | +                    return await resp.text(), await resp.json(), resp.status, resp.links | 
|  | 108 | +        except Exception as e: | 
|  | 109 | +            print(f'Error performing search: {e}') | 
|  | 110 | +            return '', {}, 500, {} | 
|  | 111 | + | 
|  | 112 | +    async def process(self, proxy: bool = False) -> None: | 
|  | 113 | +        try: | 
|  | 114 | +            self.proxy = proxy | 
|  | 115 | +            while self.counter <= self.limit and self.page != 0: | 
|  | 116 | +                try: | 
|  | 117 | +                    api_response = await self.do_search(self.page) | 
|  | 118 | +                    result = await self.handle_response(api_response) | 
|  | 119 | + | 
|  | 120 | +                    if isinstance(result, SuccessResult): | 
|  | 121 | +                        # Reset retry counter on any successful response | 
|  | 122 | +                        self.retry_count = 0 | 
|  | 123 | +                        print(f'\tSearching {self.counter} results.') | 
|  | 124 | +                        self.total_results += ''.join(result.fragments) | 
|  | 125 | +                        self.counter += len(result.fragments) | 
|  | 126 | +                        next_or_last = result.next_page or result.last_page | 
|  | 127 | +                        # Break if pagination does not advance to avoid infinite loop | 
|  | 128 | +                        if next_or_last == self.page: | 
|  | 129 | +                            print('\tNo page advancement detected; exiting to avoid infinite loop.') | 
|  | 130 | +                            self.page = 0 | 
|  | 131 | +                            break | 
|  | 132 | +                        self.page = next_or_last | 
|  | 133 | +                        await asyncio.sleep(get_delay()) | 
|  | 134 | +                    elif isinstance(result, RetryResult): | 
|  | 135 | +                        self.retry_count += 1 | 
|  | 136 | +                        if self.retry_count > self.max_retries: | 
|  | 137 | +                            print('\tMaximum retries reached; exiting to avoid infinite loop.') | 
|  | 138 | +                            self.page = 0 | 
|  | 139 | +                            break | 
|  | 140 | +                        sleepy_time = get_delay() + result.time | 
|  | 141 | +                        print(f'\tRetrying page in {sleepy_time} seconds...') | 
|  | 142 | +                        await asyncio.sleep(sleepy_time) | 
|  | 143 | +                    else: | 
|  | 144 | +                        # On error, stop to avoid endless retries on a bad state | 
|  | 145 | +                        print(f'\tException occurred: status_code: {result.status_code} reason: {result.body}') | 
|  | 146 | +                        self.page = 0 | 
|  | 147 | +                        break | 
|  | 148 | +                except Exception as e: | 
|  | 149 | +                    print(f'Error processing page: {e}') | 
|  | 150 | +                    await asyncio.sleep(get_delay()) | 
|  | 151 | +        except Exception as e: | 
|  | 152 | +            print(f'An exception has occurred in bitbucketcode process: {e}') | 
|  | 153 | + | 
|  | 154 | +    async def get_emails(self): | 
|  | 155 | +        try: | 
|  | 156 | +            rawres = myparser.Parser(self.total_results, self.word) | 
|  | 157 | +            return await rawres.emails() | 
|  | 158 | +        except Exception as e: | 
|  | 159 | +            print(f'Error getting emails: {e}') | 
|  | 160 | +            return [] | 
|  | 161 | + | 
|  | 162 | +    async def get_hostnames(self): | 
|  | 163 | +        try: | 
|  | 164 | +            rawres = myparser.Parser(self.total_results, self.word) | 
|  | 165 | +            return await rawres.hostnames() | 
|  | 166 | +        except Exception as e: | 
|  | 167 | +            print(f'Error getting hostnames: {e}') | 
|  | 168 | +            return [] | 
0 commit comments