Skip to content

Commit 1eb0f96

Browse files
committed
windows support work: fix loaders to use pathname2url to convert to
file:/// url, use urlopen to open file paths fix some tests to use universal line breaks
1 parent 06b2ea4 commit 1eb0f96

File tree

6 files changed

+31
-18
lines changed

6 files changed

+31
-18
lines changed

pywb/rewrite/rewrite_live.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
import datetime
77
import mimetypes
88
import logging
9+
import os
910

10-
from urlparse import urlsplit
11+
from urlparse import urlsplit, urljoin
12+
from urllib import pathname2url
1113

1214
from pywb.utils.loaders import is_http, LimitReader, BlockLoader
1315
from pywb.utils.loaders import extract_client_cookie
@@ -180,16 +182,25 @@ def fetch_request(self, url, urlrewriter,
180182
if url.startswith('//'):
181183
url = 'http:' + url
182184

185+
if is_http(url):
186+
is_remote = True
187+
else:
188+
is_remote = False
189+
if not url.startswith('file:'):
190+
url = os.path.abspath(url)
191+
url = urljoin('file:', pathname2url(url))
192+
print(url)
193+
183194
# explicit urlkey may be passed in (say for testing)
184195
if not urlkey:
185196
urlkey = canonicalize(url)
186197

187-
if is_http(url):
198+
if is_remote:
188199
(status_headers, stream) = self.fetch_http(url, urlkey, env,
189200
req_headers,
190201
follow_redirects,
191202
ignore_proxies)
192-
else:
203+
else:
193204
(status_headers, stream) = self.fetch_local_file(url)
194205

195206
if timestamp is None:

pywb/utils/loaders.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,13 @@ def load_file_or_resource(self, url, offset=0, length=-1):
129129
# if starting with . or /, can only be a file path..
130130
file_only = url.startswith(('/', '.'))
131131

132-
if url.startswith('file://'):
133-
url = url[len('file://'):]
134-
file_only = True
135-
136132
try:
137133
# first, try as file
138-
afile = open(url, 'rb')
134+
if url.startswith('file://'):
135+
file_only = True
136+
afile = urllib.urlopen(url)
137+
else:
138+
afile = open(url, 'rb')
139139

140140
except IOError:
141141
if file_only:

pywb/utils/test/test_bufferedreaders.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
#=================================================================
44
55
# DecompressingBufferedReader readline()
6-
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
6+
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline()
77
' CDX N b a m s k r M S V g\n'
88
99
# detect not compressed
10-
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
10+
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline()
1111
' CDX N b a m s k r M S V g\n'
1212
1313
# decompress with on the fly compression, default gzip compression

pywb/utils/test/test_loaders.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
100
2626
2727
# no length specified, read full amount requested
28-
>>> len(BlockLoader().load('file://' + test_cdx_dir + 'example.cdx', 0, -1).read(400))
28+
>>> len(BlockLoader().load('file:' + pathname2url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
2929
400
3030
3131
# HMAC Cookie Maker
@@ -65,6 +65,8 @@
6565
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
6666
from pywb.utils.loaders import LimitReader, extract_client_cookie
6767

68+
from urllib import pathname2url
69+
6870
from pywb import get_test_dir
6971

7072
test_cdx_dir = get_test_dir() + 'cdx/'

pywb/warc/test/test_indexing.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@
160160
TEST_WARC_DIR = get_test_dir() + 'warcs/'
161161

162162
def read_fully(cdx):
163-
with open(TEST_CDX_DIR + cdx) as fh:
163+
with open(TEST_CDX_DIR + cdx, 'rU') as fh:
164164
curr = BytesIO()
165165
while True:
166166
b = fh.read()
@@ -172,7 +172,7 @@ def read_fully(cdx):
172172
def cdx_index(warc, **options):
173173
buff = BytesIO()
174174

175-
with open(TEST_WARC_DIR + warc) as fh:
175+
with open(TEST_WARC_DIR + warc, 'rU') as fh:
176176
write_cdx_index(buff, fh, warc, **options)
177177

178178
return buff.getvalue()
@@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):
213213

214214
print filename
215215

216-
with open(os.path.join(tmp_dir, filename), 'r') as fh:
216+
with open(os.path.join(tmp_dir, filename), 'rU') as fh:
217217
lines = fh.read(8192).rstrip().split('\n')
218218

219219
finally:

setup.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ def run_tests(self):
5858
'pywb': ['static/flowplayer/*', 'static/*.*', 'ui/*', '*.yaml'],
5959
},
6060
data_files=[
61-
('sample_archive/cdx/', glob.glob('sample_archive/cdx/*')),
62-
('sample_archive/zipcdx/', glob.glob('sample_archive/zipcdx/*')),
63-
('sample_archive/warcs/', glob.glob('sample_archive/warcs/*')),
64-
('sample_archive/text_content/',
61+
('sample_archive/cdx', glob.glob('sample_archive/cdx/*')),
62+
('sample_archive/zipcdx', glob.glob('sample_archive/zipcdx/*')),
63+
('sample_archive/warcs', glob.glob('sample_archive/warcs/*')),
64+
('sample_archive/text_content',
6565
glob.glob('sample_archive/text_content/*')),
6666
],
6767
install_requires=[

0 commit comments

Comments
 (0)