Skip to content

Commit 5a9c56d

Browse files
authored
Merge pull request #42 from pganssle/segmenter_fixes
Segmenter fixes
2 parents 9a1c5ea + a4c971d commit 5a9c56d

File tree

3 files changed

+144
-5
lines changed

3 files changed

+144
-5
lines changed

CHANGELOG.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
Version 0.6.2
2+
=============
3+
4+
- Fixed an issue where some feeds would fail to merge files with an apostrophe in their paths.
5+
6+
- Fixed an issue where chapters taken from the beginning of a file accidentally cause the entire file to be merged into a segment.
7+
18
Version 0.6.1
29
=============
310

src/audio_feeder/m4btools.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def _merge_file_infos(
123123

124124
def _to_file_list_entry(p: Path, s: Optional[float] = None, e: Optional[float] = None):
125125
pathstr = os.fspath(p)
126-
pathstr = pathstr.replace("'", r"\'")
126+
pathstr = pathstr.replace("'", r"'\''")
127127
o = f"file '{pathstr}'\n"
128128
if s is not None:
129129
o += f"inpoint {s:0.3f}\n"
@@ -448,12 +448,19 @@ def segment_files_jobs(
448448
job_queue: MutableSequence[RenderJob] = []
449449
padding_format = _zero_padding_format(len(segmented))
450450

451+
def _get_original_duration(segment: SegmentableFiles) -> float:
452+
# Need to pull it out of file_infos because we change the duration
453+
# of the FileInfo in SegmentableFiles so that it can be more
454+
# easily merged.
455+
duration = file_infos[segment.fpath].format_info.duration
456+
assert duration is not None
457+
return duration
458+
451459
ext = files[0].suffix
452460
for i, segment in enumerate(segmented):
453461
out_file = out_path / f"Part{format(i, padding_format)}{ext}"
454462
if len(segment) == 1:
455-
duration = segment[0].file_info.format_info.duration
456-
assert duration is not None
463+
duration = _get_original_duration(segment[0])
457464
chapter = segment[0].chapter
458465
if duration > chapter.duration:
459466
subset = FileSubset(
@@ -473,8 +480,7 @@ def segment_files_jobs(
473480

474481
subsets: MutableSequence[FileSubset] = []
475482
for element in segment:
476-
duration = element.file_info.format_info.duration
477-
assert duration is not None
483+
duration = _get_original_duration(element)
478484
if abs(element.chapter.end_time - duration) < 0.25:
479485
end_time = None
480486
else:

tests/test_m4btools.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,3 +730,129 @@ def test_segmenter_split_single_file(
730730
assert actual_chapter.end_time == pytest.approx(
731731
expected_chapter.end_time, abs=0.25
732732
)
733+
734+
735+
@pytest.mark.parametrize("base_name", ("John Jones's Dollar", r"John Jones\' Dollar"))
736+
def test_segmenter_weird_names(tmp_path: pathlib.Path, base_name: str) -> None:
737+
format_info_base = file_probe.FormatInfo(
738+
format_name="mp3",
739+
format_long_name="MP2/3 (MPEG audio layer 2/3)",
740+
start_time=0.0,
741+
tags={
742+
"title": "John Jones's Dollar",
743+
"artist": "Harry Stephen Keeler",
744+
},
745+
)
746+
747+
file_infos = [
748+
file_probe.FileInfo(
749+
format_info=attrs.evolve(
750+
format_info_base,
751+
filename=f"{base_name}-Part00.mp3",
752+
duration=60.0,
753+
),
754+
chapters=[
755+
file_probe.ChapterInfo(
756+
num=0,
757+
title="Chapter 01",
758+
start_time=0.0,
759+
end_time=60.0,
760+
)
761+
],
762+
),
763+
file_probe.FileInfo(
764+
format_info=attrs.evolve(
765+
format_info_base,
766+
filename=f"{base_name}-Part01.mp3",
767+
duration=75.0,
768+
),
769+
chapters=[
770+
file_probe.ChapterInfo(
771+
num=1,
772+
title="Chapter 02",
773+
start_time=0.0,
774+
end_time=60.0,
775+
),
776+
file_probe.ChapterInfo(
777+
num=2,
778+
title="Chapter 03",
779+
start_time=60.0,
780+
end_time=75.0,
781+
),
782+
],
783+
),
784+
file_probe.FileInfo(
785+
format_info=attrs.evolve(
786+
format_info_base,
787+
filename=f"{base_name}-Part02.mp3",
788+
duration=105.0,
789+
),
790+
chapters=[
791+
file_probe.ChapterInfo(
792+
num=3,
793+
title="Chapter 04",
794+
start_time=0.0,
795+
end_time=45.0,
796+
),
797+
file_probe.ChapterInfo(
798+
num=4, title="Chapter 05", start_time=45.0, end_time=105.0
799+
),
800+
],
801+
),
802+
file_probe.FileInfo(
803+
format_info=attrs.evolve(
804+
format_info_base,
805+
filename=f"{base_name}-Part03.mp3",
806+
duration=120.0,
807+
),
808+
chapters=[
809+
file_probe.ChapterInfo(
810+
num=5,
811+
title="Chapter 06",
812+
start_time=0.0,
813+
end_time=60.0,
814+
),
815+
file_probe.ChapterInfo(
816+
num=6, title="Chapter 07", start_time=60.0, end_time=120.0
817+
),
818+
],
819+
),
820+
]
821+
822+
in_path = tmp_path / "in_path"
823+
in_path.mkdir()
824+
825+
out_path = tmp_path / "out_path"
826+
out_path.mkdir()
827+
828+
for fi in file_infos:
829+
utils.make_file(fi, in_path / fi.format_info.filename)
830+
831+
loader = dp.AudiobookLoader()
832+
files = loader.audio_files(in_path)
833+
jobs = m4btools.segment_files_jobs(
834+
files, out_path, cost_func=segmenter.asymmetric_cost(60.0)
835+
)
836+
837+
m4btools.render_jobs(jobs)
838+
839+
file_infos = [file_probe.FileInfo.from_file(p) for p in sorted(out_path.iterdir())]
840+
841+
expected_chapter_titles = [
842+
("Chapter 01",),
843+
("Chapter 02",),
844+
(
845+
"Chapter 03",
846+
"Chapter 04",
847+
),
848+
("Chapter 05",),
849+
("Chapter 06",),
850+
("Chapter 07",),
851+
]
852+
853+
assert len(file_infos) == len(expected_chapter_titles)
854+
for file_info, expected_chapters in zip(file_infos, expected_chapter_titles):
855+
assert file_info.format_info.duration == pytest.approx(60.0, abs=0.5)
856+
assert file_info.chapters and len(file_info.chapters) == len(expected_chapters)
857+
for chapter, expected_title in zip(file_info.chapters, expected_chapters):
858+
assert chapter.title == expected_title

0 commit comments

Comments
 (0)