Skip to content

Commit 4b9a2b1

Browse files
committed
add messages preloading
move media downloading to another class and another async task
1 parent 4be3f99 commit 4b9a2b1

File tree

8 files changed

+235
-54
lines changed

8 files changed

+235
-54
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "t-export"
3-
version = "0.1.0"
3+
version = "0.1.1"
44
description = "Telegram chats export tool."
55
authors = ["RuslanUC <dev_ruslan_uc@protonmail.com>"]
66
readme = "README.md"

texport/export_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class ExportConfig:
3131
from_date: datetime = datetime(1970, 1, 1)
3232
to_date: datetime = datetime.now()
3333
print: bool = False
34+
preload: bool = False
3435

3536
def excluded_media(self) -> set[MessageMediaType]:
3637
result = set()

texport/exporter.py

Lines changed: 38 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,29 @@
11
import asyncio
22
from datetime import date
3-
from os.path import relpath
4-
from typing import Union, Optional
3+
from typing import Union
54

65
from pyrogram import Client
76
from pyrogram.types import Message as PyroMessage
87
from pyrogram.utils import zero_datetime
98

10-
from texport.export_config import ExportConfig
11-
from texport.media import MEDIA_TYPES
12-
from texport.messages_saver import MessagesSaver
13-
from texport.progress_print import ProgressPrint
14-
15-
16-
class ExportStatus:
17-
def __init__(self):
18-
self.approx_messages_count = None
19-
self.last_message_id = None
20-
self.last_date = None
9+
from .export_config import ExportConfig
10+
from .media import MEDIA_TYPES
11+
from .media_downloader import MediaExporter
12+
from .messages_preloader import Preloader
13+
from .messages_saver import MessagesSaver
14+
from .progress_print import ProgressPrint
2115

2216

2317
class Exporter:
2418
def __init__(self, client: Client, export_config: ExportConfig=None):
2519
self._config = export_config or ExportConfig()
2620
self._client = client
2721
self._task = None
28-
self.status: Optional[ExportStatus] = None
29-
self._progress: ProgressPrint = ProgressPrint(disabled=not self._config.print)
22+
self.progress: ProgressPrint = ProgressPrint(disabled=not self._config.print)
3023
self._messages: list[PyroMessage] = []
3124
self._media: dict[Union[int, str], str] = {}
3225
self._saver = MessagesSaver(self._messages, self._media, export_config)
26+
self._media_downloader = MediaExporter(client, export_config, self._media, self.progress)
3327
self._excluded_media = self._config.excluded_media()
3428

3529
async def _export_media(self, message: PyroMessage) -> None:
@@ -40,57 +34,59 @@ async def _export_media(self, message: PyroMessage) -> None:
4034
if media.file_size > self._config.size_limit * 1024 * 1024:
4135
return
4236

43-
path = await message.download(file_name=f"{self._config.output_dir.absolute()}/{m.dir_name}/")
44-
path = relpath(path, self._config.output_dir.absolute())
45-
self._media[message.id] = path
37+
self._media_downloader.add(media.file_id, f"{self._config.output_dir.absolute()}/{m.dir_name}/", message.id)
4638

4739
if hasattr(media, "thumbs") and media.thumbs:
48-
path = await self._client.download_media(media.thumbs[0].file_id,
49-
file_name=f"{self._config.output_dir.absolute()}/thumbs/")
50-
path = relpath(path, self._config.output_dir.absolute())
51-
self._media[f"{message.id}_thumb"] = path
40+
self._media_downloader.add(media.thumbs[0].file_id, f"{self._config.output_dir.absolute()}/thumbs/",
41+
f"{message.id}_thumb")
42+
43+
async def _write(self, wait_media: list[int]) -> None:
44+
self.progress.status = "Waiting for all media to be downloaded..."
45+
await self._media_downloader.wait(wait_media)
46+
self.progress.status = "Writing messages to file..."
47+
await self._saver.save()
5248

5349
async def _export(self, chat_id: Union[int, str]):
50+
await self._media_downloader.run()
51+
5452
offset_date = zero_datetime() if self._config.to_date.date() >= date.today() else self._config.to_date
5553
loaded = 0
56-
self._progress.approx_messages_count = await self._client.get_chat_history_count(chat_id)
57-
async for message in self._client.get_chat_history(chat_id, offset_date=offset_date):
54+
medias = []
55+
self.progress.approx_messages_count = await self._client.get_chat_history_count(chat_id)
56+
messages_iter = Preloader(self._client, self.progress, self._export_media) \
57+
if self._config.preload else self._client.get_chat_history
58+
async for message in messages_iter(chat_id, offset_date=offset_date):
5859
if message.date < self._config.from_date:
5960
break
6061

6162
loaded += 1
62-
with self._progress.update():
63-
self._progress.status = "Exporting messages..."
64-
self._progress.messages_exported = loaded
65-
66-
if self.status.approx_messages_count is None:
67-
self.status.approx_messages_count = message.id
68-
self.status.last_message_id = message.id
69-
self.status.last_date = message.date
63+
with self.progress.update():
64+
self.progress.status = "Exporting messages..."
65+
self.progress.messages_exported = loaded
7066

7167
if message.media:
72-
self._progress.status = "Downloading media..."
68+
medias.append(message.id)
69+
medias.append(f"{message.id}_thumb")
7370
await self._export_media(message)
7471

7572
if not message.text and not message.caption and message.id not in self._media:
7673
continue
7774

7875
self._messages.append(message)
79-
if len(self._messages) > 5000:
80-
self._progress.status = "Writing messages to file..."
81-
await self._saver.save()
76+
if len(self._messages) > 1000:
77+
await self._write(medias)
8278

8379
if self._messages:
84-
self._progress.status = "Writing messages to file..."
85-
await self._saver.save()
86-
self.status = self._task = None
80+
await self._write(medias)
81+
self._task = None
8782

88-
self._progress.status = "Done!"
83+
self.progress.status = "Stopping media downloader..."
84+
await self._media_downloader.stop()
85+
self.progress.status = "Done!"
8986

9087
async def export(self, block: bool=True) -> None:
91-
if self._task is not None or self.status is not None:
88+
if self._task is not None:
9289
return
93-
self.status = ExportStatus()
9490
coro = self._export(self._config.chat_id)
9591
if block:
9692
await coro

texport/main.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ async def _main(session_name: str, api_id: int, api_hash: str, config: ExportCon
1717
async with Client(f"{Path.home()}/.texport/{session_name}", api_id=api_id, api_hash=api_hash) as client:
1818
exporter = Exporter(client, config)
1919
await exporter.export()
20-
print("Export complete!")
20+
if config.print:
21+
print("Export complete!")
2122

2223

2324
@click.command()
@@ -44,11 +45,12 @@ async def _main(session_name: str, api_id: int, api_hash: str, config: ExportCon
4445
@click.option("--stickers/--no-stickers", default=True, help="Download stickers or not.")
4546
@click.option("--gifs/--no-gifs", default=True, help="Download gifs or not.")
4647
@click.option("--documents/--no-documents", default=True, help="Download documents or not.")
47-
@click.option("--quiet", default=False, help="Do not print progress to console.")
48+
@click.option("--quiet", "-q", is_flag=True, default=False, help="Do not print progress to console.")
49+
@click.option("--no-preload", is_flag=True, default=False, help="Do not preload all messages.")
4850
def main(
4951
session_name: str, api_id: int, api_hash: str, chat_id: str, output: str, size_limit: int, from_date: str,
5052
to_date: str, photos: bool, videos: bool, voice: bool, video_notes: bool, stickers: bool, gifs: bool,
51-
documents: bool, quiet: bool,
53+
documents: bool, quiet: bool, no_preload: bool,
5254
) -> None:
5355
home = Path.home()
5456
texport_dir = home / ".texport"
@@ -69,6 +71,7 @@ def main(
6971
export_gifs=gifs,
7072
export_files=documents,
7173
print=not quiet,
74+
preload=not no_preload,
7275
)
7376

7477
if session_name.endswith(".session"):

texport/media_downloader.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import asyncio
2+
from asyncio import sleep
3+
from os.path import relpath
4+
from typing import Union, Optional
5+
6+
from pyrogram import Client
7+
from pyrogram.errors import RPCError
8+
9+
from .export_config import ExportConfig
10+
from .progress_print import ProgressPrint
11+
12+
13+
class MediaExporter:
14+
def __init__(self, client: Client, config: ExportConfig, media_dict: dict, progress: ProgressPrint):
15+
self.client = client
16+
self.config = config
17+
self.output = media_dict
18+
self.task = None
19+
self.queue: list[tuple[str, str, Union[str, int]]] = []
20+
self.ids: set[Union[str, int]] = set()
21+
self.all_ids: set[Union[str, int]] = set()
22+
self.progress = progress
23+
24+
self._running = False
25+
26+
def add(self, file_id: str, download_dir: str, out_id: Union[str, int]) -> None:
27+
if out_id in self.all_ids: return
28+
self.queue.append((file_id, download_dir, out_id))
29+
self.ids.add(out_id)
30+
self.all_ids.add(out_id)
31+
self._status()
32+
33+
async def _download(self, file_id: str, download_dir: str, out_id: Union[str, int]) -> None:
34+
try:
35+
path = await self.client.download_media(file_id, file_name=download_dir)
36+
except RPCError:
37+
return
38+
path = relpath(path, self.config.output_dir.absolute())
39+
self.output[out_id] = path
40+
41+
def _status(self, status: str=None) -> None:
42+
with self.progress.update():
43+
self.progress.media_status = status or self.progress.media_status
44+
self.progress.media_queue = len(self.queue)
45+
46+
async def _task(self) -> None:
47+
while self._running:
48+
if not self.queue:
49+
self._status("Idle...")
50+
await sleep(.1)
51+
continue
52+
self._status("Downloading...")
53+
await self._download(*self.queue[0])
54+
_, _, task_id = self.queue.pop(0)
55+
self.ids.discard(task_id)
56+
57+
self._status("Stopped...")
58+
59+
async def run(self) -> None:
60+
self._running = True
61+
self.task = asyncio.get_event_loop().create_task(self._task())
62+
63+
async def stop(self) -> None:
64+
await self.wait()
65+
self._running = False
66+
67+
async def wait(self, messages: Optional[list[int]]=None) -> None:
68+
messages = set(messages) if messages is not None else None
69+
while self._running and self.queue:
70+
if messages is not None and not messages.intersection(self.ids):
71+
break
72+
await sleep(.1)

texport/messages_preloader.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from asyncio import sleep, get_event_loop
2+
3+
from pyrogram import Client
4+
from pyrogram.types import Message as PyroMessage
5+
6+
from .progress_print import ProgressPrint
7+
8+
9+
class Preloader:
10+
def __init__(self, client: Client, progress: ProgressPrint, media_cb):
11+
self.client = client
12+
self.progress = progress
13+
self.finished = False
14+
self.messages: list[PyroMessage] = []
15+
self.messages_loaded = 0
16+
self.media_cb = media_cb
17+
18+
self._task = None
19+
self._pyro_args = ()
20+
self._pyro_kwargs = {}
21+
22+
def __call__(self, *pyrogram_args, **pyrogram_kwargs):
23+
self._pyro_args = pyrogram_args
24+
self._pyro_kwargs = pyrogram_kwargs
25+
return self
26+
27+
def __aiter__(self):
28+
return self
29+
30+
async def _preload(self) -> None:
31+
async for message in self.client.get_chat_history(*self._pyro_args, **self._pyro_kwargs):
32+
self.messages.append(message)
33+
self.messages_loaded += 1
34+
35+
if message.media and self.media_cb:
36+
await self.media_cb(message)
37+
38+
with self.progress.update():
39+
self.progress.status = "Preloading messages and media..."
40+
self.progress.messages_loaded = self.messages_loaded
41+
42+
self.finished = True
43+
44+
async def __anext__(self) -> PyroMessage:
45+
if self._task is None: self._task = get_event_loop().create_task(self._preload())
46+
47+
while not self.finished and not self.messages:
48+
await sleep(.01)
49+
50+
if self.finished and not self.messages:
51+
raise StopAsyncIteration
52+
53+
return self.messages.pop(0)
54+

texport/messages_saver.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ def _save(self) -> None:
4141
output = Export(prev.chat.first_name, output).to_html()
4242
with open(f"{out_dir}/messages{self.part}.html", "w", encoding="utf8") as f:
4343
f.write(output)
44+
45+
self.part += 1
4446

4547
async def save(self) -> None:
4648
loop = get_running_loop()

0 commit comments

Comments
 (0)