|
| 1 | +import re |
| 2 | +import asyncio |
| 3 | +from typing import List, Tuple |
| 4 | +from urllib.parse import quote_plus |
| 5 | + |
| 6 | +import httpx |
| 7 | +from selectolax.parser import HTMLParser |
| 8 | + |
| 9 | +from torrra.indexers.base import BaseIndexer |
| 10 | +from torrra.types import Torrent |
| 11 | + |
| 12 | + |
| 13 | +class Indexer(BaseIndexer): |
| 14 | + BASE_URL = "" |
| 15 | + def search(self, query: str) -> List[Torrent]: |
| 16 | + normalized_query = quote_plus(query) |
| 17 | + url = f"https://www.5movierulz.voto/search_movies?s={normalized_query}" |
| 18 | + parser = self._get_parser(url) |
| 19 | + |
| 20 | + results = [] |
| 21 | + |
| 22 | + has_no_results = parser.css_first("div.content ul h1") |
| 23 | + if has_no_results: |
| 24 | + return results |
| 25 | + |
| 26 | + titles_links: List[Tuple[str, str]] = [] |
| 27 | + |
| 28 | + nodes = parser.css("div.content ul li") |
| 29 | + for node in nodes: |
| 30 | + title_node = node.css_first("p b") |
| 31 | + link_node = node.css_first("a") |
| 32 | + title = title_node.text() if title_node else "" |
| 33 | + link = link_node.attributes.get("href") if link_node else "" |
| 34 | + |
| 35 | + if query.lower() not in title.lower() or not link: |
| 36 | + continue |
| 37 | + |
| 38 | + titles_links.append((title, link)) |
| 39 | + |
| 40 | + magnets_list = asyncio.run(self._fetch_magnet_uris(titles_links)) |
| 41 | + |
| 42 | + for title, magnets in zip([t[0] for t in titles_links], magnets_list): |
| 43 | + for magnet in magnets: |
| 44 | + results.append( |
| 45 | + Torrent( |
| 46 | + title=f"{title} {magnet.title}", |
| 47 | + magnet_uri=magnet.magnet_uri |
| 48 | + ) |
| 49 | + ) |
| 50 | + |
| 51 | + return results |
| 52 | + |
| 53 | + async def _fetch_magnet_uris(self, items: List[Tuple[str, str]]) -> List[List[Torrent]]: |
| 54 | + async def fetch(client: httpx.AsyncClient, url: str): |
| 55 | + res = await client.get(url, timeout=10) |
| 56 | + parser = HTMLParser(res.text) |
| 57 | + |
| 58 | + results = [] |
| 59 | + |
| 60 | + a_nodes = parser.css("div.entry-content p a") |
| 61 | + nodes = [node for node in a_nodes if "GET THIS TORRENT" in node.text(strip=True)] |
| 62 | + |
| 63 | + for node in nodes: |
| 64 | + magnet_uri = node.attributes.get("href") |
| 65 | + if not magnet_uri: |
| 66 | + continue |
| 67 | + |
| 68 | + title_node = node.css_first("small") |
| 69 | + title = title_node.text(strip=True) if title_node else "" |
| 70 | + formatted_title = re.sub(r'\b(\d+(\.\d+)?)\s*(gb|mb|kb)\b', lambda m: f"{m.group(1)} {m.group(3).upper()}", title) |
| 71 | + |
| 72 | + results.append(Torrent(title=formatted_title, magnet_uri=magnet_uri)) |
| 73 | + |
| 74 | + return results |
| 75 | + |
| 76 | + async with httpx.AsyncClient() as client: |
| 77 | + tasks = [fetch(client, url) for (_, url) in items] |
| 78 | + return await asyncio.gather(*tasks) |
0 commit comments