Skip to content

Commit cf0a215

Browse files
committed
loaders: add to_file_url() for converting between filename and file://,
used in live rewrite and tests
1 parent ba853a4 commit cf0a215

File tree

4 files changed

+25
-27
lines changed

4 files changed

+25
-27
lines changed

pywb/rewrite/rewrite_live.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@
88
import logging
99
import os
1010

11-
from urlparse import urlsplit, urljoin
12-
from urllib import pathname2url
11+
from urlparse import urlsplit
1312

14-
from pywb.utils.loaders import is_http, LimitReader, BlockLoader
13+
from pywb.utils.loaders import is_http, LimitReader, BlockLoader, to_file_url
1514
from pywb.utils.loaders import extract_client_cookie
1615
from pywb.utils.timeutils import datetime_to_timestamp
1716
from pywb.utils.statusandheaders import StatusAndHeaders
@@ -187,8 +186,7 @@ def fetch_request(self, url, urlrewriter,
187186
else:
188187
is_remote = False
189188
if not url.startswith('file:'):
190-
url = os.path.abspath(url)
191-
url = urljoin('file:', pathname2url(url))
189+
url = to_file_url(url)
192190

193191
# explicit urlkey may be passed in (say for testing)
194192
if not urlkey:

pywb/utils/loaders.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import hmac
88
import urllib
99
import urllib2
10+
import urlparse
1011
import time
1112
import pkg_resources
1213
from io import open
@@ -17,6 +18,15 @@ def is_http(filename):
1718
return filename.startswith(('http://', 'https://'))
1819

1920

21+
#=================================================================
22+
def to_file_url(filename):
23+
""" Convert a filename to a file:// url
24+
"""
25+
url = os.path.abspath(filename)
26+
url = urlparse.urljoin('file:', urllib.pathname2url(url))
27+
return url
28+
29+
2030
#=================================================================
2131
def load_yaml_config(config_file):
2232
import yaml
@@ -129,13 +139,14 @@ def load_file_or_resource(self, url, offset=0, length=-1):
129139
# if starting with . or /, can only be a file path..
130140
file_only = url.startswith(('/', '.'))
131141

142+
# convert to filename
143+
if url.startswith('file://'):
144+
file_only = True
145+
url = urllib.url2pathname(url[len('file://'):])
146+
132147
try:
133148
# first, try as file
134-
if url.startswith('file://'):
135-
file_only = True
136-
afile = urllib.urlopen(url)
137-
else:
138-
afile = open(url, 'rb')
149+
afile = open(url, 'rb')
139150

140151
except IOError:
141152
if file_only:

pywb/utils/test/test_loaders.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
100
2626
2727
# no length specified, read full amount requested
28-
>>> len(BlockLoader().load(to_local_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
28+
>>> len(BlockLoader().load(to_file_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
2929
400
3030
3131
# HMAC Cookie Maker
@@ -63,11 +63,9 @@
6363
import re
6464
import os
6565
from io import BytesIO
66-
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
66+
from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url
6767
from pywb.utils.loaders import LimitReader, extract_client_cookie
6868

69-
from urllib import pathname2url
70-
7169
from pywb import get_test_dir
7270

7371
test_cdx_dir = get_test_dir() + 'cdx/'
@@ -84,9 +82,6 @@ def seek_read_full(seekable_reader, offset):
8482
seekable_reader.readline() #skip
8583
return seekable_reader.readline()
8684

87-
def to_local_url(filename):
88-
filename = os.path.abspath(filename)
89-
return 'file://' + pathname2url(filename)
9085

9186
if __name__ == "__main__":
9287
import doctest

pywb/warc/test/test_pathresolvers.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@
3333
RedisResolver('redis://myhost.example.com:1234/1')
3434
3535
# a file
36-
>>> r = make_best_resolver(to_local_url(os.path.realpath(__file__)))
36+
>>> r = make_best_resolver(to_file_url(os.path.realpath(__file__)))
3737
>>> r.__class__.__name__
3838
'PathIndexResolver'
3939
4040
# a dir
4141
>>> path = os.path.realpath(__file__)
42-
>>> r = make_best_resolver(to_local_url(os.path.dirname(path)))
42+
>>> r = make_best_resolver(to_file_url(os.path.dirname(path)))
4343
>>> r.__class__.__name__
4444
'PrefixResolver'
4545
@@ -54,9 +54,9 @@
5454
from pywb import get_test_dir
5555
from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
5656
from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers
57-
import os
57+
from pywb.utils.loaders import to_file_url
5858

59-
from urllib import pathname2url
59+
import os
6060

6161
from fakeredis import FakeStrictRedis
6262
from mock import patch
@@ -69,12 +69,6 @@ def init_redis_resolver():
6969
def hset_path(filename, path):
7070
redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path)
7171

72-
def to_local_url(filename):
73-
filename = os.path.abspath(filename)
74-
res = 'file:' + pathname2url(filename)
75-
#print(res)
76-
return res
77-
7872
redis_resolver = init_redis_resolver()
7973

8074
#=================================================================

0 commit comments

Comments
 (0)