Skip to content

Commit 52c5b84

Browse files
committed
Fix dupe renaming and add additional test for warc.gz
1 parent ee15a3e commit 52c5b84

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

pywb/manager/manager.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import re
88
import gzip
99
import six
10+
import pathlib
1011

1112
from distutils.util import strtobool
1213
from pkg_resources import resource_string, get_distribution
@@ -149,8 +150,11 @@ def add_archives(self, archives, unpack_wacz=False):
149150

150151
def _rename_warc(self, warc_basename):
151152
dupe_idx = 1
153+
ext = ''.join(pathlib.Path(warc_basename).suffixes)
154+
pre_ext_name = warc_basename.split(ext)[0]
155+
152156
while True:
153-
new_basename = f'{warc_basename}-{dupe_idx}'
157+
new_basename = f'{pre_ext_name}-{dupe_idx}{ext}'
154158
if not os.path.exists(os.path.join(self.archive_dir, new_basename)):
155159
break
156160
dupe_idx += 1

tests/test_manager.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,21 @@ def test_add_valid_archives_unpack_wacz(self, tmp_path):
6565
assert archive in os.listdir(manager.archive_dir)
6666
assert archive in index_text
6767

68+
def test_add_valid_archives_dupe_name(self, tmp_path):
69+
manager = self.get_test_collections_manager(tmp_path)
70+
warc_filename = 'sample_archive/warcs/example.warc.gz'
71+
manager.add_archives(warc_filename)
72+
manager.add_archives(warc_filename)
73+
74+
with open(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE), 'r') as f:
75+
index_text = f.read()
76+
77+
expected_archives = ('example.warc.gz', 'example-1.warc.gz')
78+
79+
for archive in expected_archives:
80+
assert archive in os.listdir(manager.archive_dir)
81+
assert archive in index_text
82+
6883
def test_add_valid_archives_dont_unpack_wacz(self, tmp_path):
6984
manager = self.get_test_collections_manager(tmp_path)
7085
archives = ['sample_archive/warcs/example.arc', 'sample_archive/warcs/example.arc.gz',

0 commit comments

Comments
 (0)