Skip to content

Commit db75bda

Browse files
committed
file open() pass: convert all read and write to ensure binary 'b' flag is set (#56)
1 parent fb4bf81 commit db75bda

File tree

9 files changed

+14
-14
lines changed

9 files changed

+14
-14
lines changed

pywb/cdx/cdxsource.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def __init__(self, filename):
3030
def load_cdx(self, query):
3131
def do_open():
3232
try:
33-
source = open(self.filename)
33+
source = open(self.filename, 'rb')
3434
gen = iter_range(source, query.key, query.end_key)
3535
for line in gen:
3636
yield line

pywb/cdx/test/test_redis_source.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
def load_cdx_into_redis(source, filename, key=None):
2828
# load a cdx into mock redis
29-
with open(test_cdx_dir + filename) as fh:
29+
with open(test_cdx_dir + filename, 'rb') as fh:
3030
for line in fh:
3131
zadd_cdx(source, line, key)
3232

pywb/cdx/zipnum.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def load_loc(self):
8484
self.loc_mtime = new_mtime
8585

8686
logging.debug('Loading loc from: ' + self.loc_filename)
87-
with open(self.loc_filename) as fh:
87+
with open(self.loc_filename, 'rb') as fh:
8888
for line in fh:
8989
parts = line.rstrip().split('\t')
9090
self.loc_map[parts[0]] = parts[1:]
@@ -112,7 +112,7 @@ def lookup_loc(self, part):
112112
def load_cdx(self, query):
113113
self.load_loc()
114114

115-
reader = open(self.summary)
115+
reader = open(self.summary, 'rb')
116116

117117
idx_iter = iter_range(reader,
118118
query.key,

pywb/framework/proxy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ def handle_cert_install(self, env):
334334
return None
335335

336336
buff = ''
337-
with open(self.ca.ca_file) as fh:
337+
with open(self.ca.ca_file, 'rb') as fh:
338338
buff = fh.read()
339339

340340
content_type = 'application/x-x509-ca-cert'

pywb/utils/test/test_binsearch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,12 @@
6666
test_cdx_dir = get_test_dir() + 'cdx/'
6767

6868
def print_binsearch_results(key, iter_func):
69-
with open(test_cdx_dir + 'iana.cdx') as cdx:
69+
with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
7070
for line in iter_func(cdx, key):
7171
print line
7272

7373
def print_binsearch_results_range(key, end_key, iter_func, prev_size=0):
74-
with open(test_cdx_dir + 'iana.cdx') as cdx:
74+
with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
7575
for line in iter_func(cdx, key, end_key, prev_size=prev_size):
7676
print line
7777

pywb/utils/test/test_bufferedreaders.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
#=================================================================
44
55
# DecompressingBufferedReader readline()
6-
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline()
6+
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
77
' CDX N b a m s k r M S V g\n'
88
99
# detect not compressed
10-
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline()
10+
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
1111
' CDX N b a m s k r M S V g\n'
1212
1313
# decompress with on the fly compression, default gzip compression

pywb/warc/cdxindexer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def write_multi_cdx_index(output, inputs, **options):
115115
outpath = cdx_filename(filename)
116116
outpath = os.path.join(output, outpath)
117117

118-
with open(outpath, 'w') as outfile:
118+
with open(outpath, 'wb') as outfile:
119119
with open(fullpath, 'rb') as infile:
120120
write_cdx_index(outfile, infile, filename, **options)
121121

@@ -124,7 +124,7 @@ def write_multi_cdx_index(output, inputs, **options):
124124
if output == '-':
125125
outfile = sys.stdout
126126
else:
127-
outfile = open(output, 'w')
127+
outfile = open(output, 'wb')
128128

129129
if options.get('sort'):
130130
writer_cls = SortedCDXWriter

pywb/warc/pathresolvers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def __repr__(self):
5757
class PathIndexResolver:
5858
def __init__(self, pathindex_file):
5959
self.pathindex_file = pathindex_file
60-
self.reader = open(pathindex_file)
60+
self.reader = open(pathindex_file, 'rb')
6161

6262
def __call__(self, filename):
6363
result = iter_exact(self.reader, filename, '\t')

pywb/warc/test/test_indexing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@
160160
TEST_WARC_DIR = get_test_dir() + 'warcs/'
161161

162162
def read_fully(cdx):
163-
with open(TEST_CDX_DIR + cdx, 'rU') as fh:
163+
with open(TEST_CDX_DIR + cdx, 'rb') as fh:
164164
curr = BytesIO()
165165
while True:
166166
b = fh.read()
@@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):
213213

214214
print filename
215215

216-
with open(os.path.join(tmp_dir, filename), 'rU') as fh:
216+
with open(os.path.join(tmp_dir, filename), 'rb') as fh:
217217
lines = fh.read(8192).rstrip().split('\n')
218218

219219
finally:

0 commit comments

Comments
 (0)