Skip to content

Commit a51e841

Browse files
committed
fix: wrong chapter order, regression from a2daaf4
chapters will be in random order due to human_sort() being called on the whole index in convert_pages. the key for sorting is the absolute path of each page, which features the random UUID of each Archive. learning time: random UUIDs are random. only sort pages when extracting instead, since the culprit appears to be Window's ZLIB being non-standard (Windows moment)
1 parent 1856da2 commit a51e841

File tree

2 files changed

+34
-47
lines changed

2 files changed

+34
-47
lines changed

src/reCBZ/__init__.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
11
import re
2-
import pathlib
2+
import tempfile
3+
from pathlib import Path
34
from uuid import uuid4
45

56
__version__ = "0.7.3"
67
CMDNAME = 'recbz'
78

8-
MODULE_PATH = pathlib.Path(__file__).resolve().parent
9+
MODULE_PATH = Path(__file__).resolve().parent
910

1011
# whether to print the 'recbz.py' title at the beginning
1112
SHOWTITLE = True
1213

1314
# global UUID for files stored in temp, so we can ensure multiple instances
1415
# created under the same process don't delete cache currently used by another
15-
TEMPUUID = str(uuid4().hex)
16+
CACHE_PREFIX:str = f'reCBZCACHE_'
17+
GLOBAL_CACHEDIR = Path(tempfile.gettempdir()) / f'{CACHE_PREFIX}{str(uuid4().hex)}'
18+
if not GLOBAL_CACHEDIR.exists():
19+
GLOBAL_CACHEDIR.mkdir()
1620

1721
IMG_FILES = re.compile('^.*\\.(?!png\\b|webp\\b|jpg\\b|jpeg\\b)\\w*$')
1822
EPUB_FILES = re.compile('^.*(calibre_bookmarks.txt)$|^.*(mimetype)$|.*\\.(?=css\\b|opf\\b|ncx\\b|xhtml\\b|xml\\b)\\w*$')

src/reCBZ/archive.py

Lines changed: 27 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
3+
import re
34
import time
4-
import tempfile
55
import shutil
6+
import tempfile
67
from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED, BadZipFile
78
from functools import partial
89
from pathlib import Path
@@ -20,8 +21,6 @@
2021

2122
VALID_BOOK_FORMATS:tuple = ('cbz', 'zip', 'epub', 'mobi')
2223
SOURCE_NAME:str = 'Source'
23-
CACHE_PREFIX:str = 'reCBZCACHE_'
24-
global_cache_id:str = f'{CACHE_PREFIX}{reCBZ.TEMPUUID}_'
2524
chapter_prefix:str = 'v' # :) :D C:
2625

2726

@@ -30,23 +29,12 @@ def write_zip(savepath, chapters):
3029
lead_zeroes = len(str(len(chapters)))
3130
for i, chapter in enumerate(chapters):
3231
for page in chapter:
33-
# what we're essentially trying to achieve here is determine a page's
34-
# path relative to its *local* cachedir, which varies by class
35-
# instance (so they don't mix) but is constant to each process
36-
temp = Path(tempfile.gettempdir())
37-
proc_cache = temp.glob(f'{global_cache_id}*')
38-
local_parent_dir = None
39-
for path in proc_cache:
40-
if page.fp.is_relative_to(path):
41-
local_parent_dir = path
42-
if local_parent_dir is None:
43-
raise OSError(f'{page.fp} not in any subpath of process cache')
44-
45-
dest = ''
32+
4633
if len(chapters) > 1: # no parent if there's only one chapter
47-
dest += f'{chapter_prefix}{i+1:0{lead_zeroes}d}/'
48-
dest += f'{page.fp.relative_to(local_parent_dir)}'
49-
dest = Path(dest)
34+
dest = Path(f'{chapter_prefix}{i+1:0{lead_zeroes}d}') / page.rel_path
35+
else:
36+
dest = Path(page.rel_path)
37+
mylog(f"ZIP: write '{page.name}' to {dest}")
5038
if config.compress_zip:
5139
new_zip.write(page.fp, dest, ZIP_DEFLATED, 9)
5240
else:
@@ -170,9 +158,16 @@ def convert_page_worker(source, options, savedir=None):
170158
class Page():
171159
def __init__(self, file_name):
172160
self.fp = Path(file_name)
161+
# i tried for hours but windows can't correctly pickle the
162+
# GLOBAL_CACHEDIR, it's not thread safe for whatever reason. some
163+
# instances will init with a new UUID which can't be compared.
164+
# this is the least hacky way I could come up with to keep Unix parity
165+
uuid_part = [part for part in self.fp.parts if reCBZ.CACHE_PREFIX in part]
166+
global_cache = Path(tempfile.gettempdir()) / uuid_part[0]
167+
local_cache = global_cache / self.fp.relative_to(global_cache).parts[0]
168+
self.rel_path = self.fp.relative_to(local_cache)
173169
self.name = str(self.fp.name)
174170
self.stem = str(self.fp.stem)
175-
self._source_fp = self.fp
176171
self._img:Image.Image
177172
self._fmt = None
178173
self._closed = True
@@ -259,8 +254,8 @@ def __init__(self, filename:str):
259254
self._index:list = []
260255
self._chapter_lengths = []
261256
self._chapters = []
262-
self._cachedir:Path = Path('.')
263257
self._bad_files = []
258+
self._cachedir = Path(tempfile.mkdtemp(prefix='book_', dir=reCBZ.GLOBAL_CACHEDIR))
264259

265260
@property
266261
def bad_files(self):
@@ -283,16 +278,6 @@ def fetch_chapters(self):
283278
return chapters
284279

285280
def extract(self, count:int=0, raw:bool=False) -> tuple:
286-
# check and clean previous cache
287-
tempdir = Path(tempfile.gettempdir())
288-
prev_dirs = tempdir.glob(f'{CACHE_PREFIX}*')
289-
for path in prev_dirs:
290-
assert path != tempdir # for the love of god
291-
if not str(global_cache_id) in str(path):
292-
mylog(f'hex {global_cache_id} not in {path}, cleaning up')
293-
shutil.rmtree(path)
294-
295-
self._cachedir = Path(tempfile.mkdtemp(prefix=global_cache_id))
296281
try:
297282
source_zip = ZipFile(self.fp)
298283
except BadZipFile as err:
@@ -313,6 +298,9 @@ def extract(self, count:int=0, raw:bool=False) -> tuple:
313298

314299
# god bless you Georgy https://stackoverflow.com/a/50927977/
315300
raw_paths = tuple(filter(Path.is_file, Path(self._cachedir).rglob('*')))
301+
# solves the need to invert files in EPUB, where the destination can't
302+
# be inferred from the original filepath. critical, because files are
303+
# randomly ordered on Windows (probably due to the ZLIB implementation)
316304
sorted_paths = tuple(human_sort(raw_paths))
317305
sorted_pages = tuple(Page(path) for path in sorted_paths)
318306

@@ -355,14 +343,11 @@ def convert_pages(self, fmt=None, quality=None, grayscale=None, size=None) -> tu
355343
if grayscale is not None: options['grayscale'] = bool(grayscale)
356344
if size is not None: options['size'] = size
357345

358-
source_pages = self.fetch_pages()
359346
worker = partial(convert_page_worker, options=options)
360-
results = map_workers(worker, source_pages)
361-
# solves the need to invert files in EPUB. critical for windows, because
362-
# files are randomly ordered for whatever reason.
363-
sorted_paths = human_sort([item[1].fp for item in results if item[0]])
347+
results = map_workers(worker, self.fetch_pages())
348+
364349
self._bad_files = [item[1].fp for item in results if item[0] is False]
365-
self._index = [Page(path) for path in sorted_paths]
350+
self._index = [item[1] for item in results if item[0]]
366351
mylog('', progress=True)
367352
return tuple(self._index)
368353

@@ -448,12 +433,10 @@ def remove_page(self, index):
448433

449434
@classmethod
450435
def cleanup(cls):
451-
tempdir = Path(tempfile.gettempdir())
452-
prev_dirs = tempdir.glob(f'{CACHE_PREFIX}*')
453-
for path in prev_dirs:
454-
assert path != tempdir # for the love of god
455-
mylog(f'cleanup(): {path}]')
436+
g_cache = reCBZ.GLOBAL_CACHEDIR
437+
if g_cache.exists():
438+
mylog(f'cleanup(): {g_cache}]')
456439
try:
457-
shutil.rmtree(path)
440+
shutil.rmtree(g_cache)
458441
except PermissionError:
459-
mylog(f"PermissionError, couldn't clean {path}")
442+
mylog(f"PermissionError, couldn't clean {g_cache}")

0 commit comments

Comments
 (0)