Skip to content

Commit feabc11

Browse files
committed
Merge pull request #14 from WGH-/remove-utf8-sniffing
- Fixed type information Fixes #6
2 parents e8290df + 44230fa commit feabc11

File tree

3 files changed

+36
-108
lines changed

3 files changed

+36
-108
lines changed

bencode/__init__.py

Lines changed: 10 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -60,20 +60,9 @@ def decode_int(x, f):
6060
return n, newf + 1
6161

6262

63-
def decode_string(x, f, try_decode_utf8=True, force_decode_utf8=False):
64-
# type: (bytes, int, bool, bool) -> Tuple[bytes, int]
63+
def decode_string(x, f):
64+
# type: (bytes, int) -> Tuple[bytes, int]
6565
"""Decode torrent bencoded 'string' in x starting at f.
66-
67-
An attempt is made to convert the string to a python string from utf-8.
68-
However, both string and non-string binary data is intermixed in the
69-
torrent bencoding standard. So we have to guess whether the byte
70-
sequence is a string or just binary data. We make this guess by trying
71-
to decode (from utf-8), and if that fails, assuming it is binary data.
72-
There are some instances where the data SHOULD be a string though.
73-
You can check enforce this by setting force_decode_utf8 to True. If the
74-
decoding from utf-8 fails, an UnidcodeDecodeError is raised. Similarly,
75-
if you know it should not be a string, you can skip the decoding
76-
attempt by setting try_decode_utf8=False.
7766
"""
7867
colon = x.index(b':', f)
7968
n = int(x[f:colon])
@@ -84,13 +73,6 @@ def decode_string(x, f, try_decode_utf8=True, force_decode_utf8=False):
8473
colon += 1
8574
s = x[colon:colon + n]
8675

87-
if try_decode_utf8:
88-
try:
89-
return s.decode('utf-8'), colon + n
90-
except UnicodeDecodeError:
91-
if force_decode_utf8:
92-
raise
93-
9476
return bytes(s), colon + n
9577

9678

@@ -135,7 +117,7 @@ def decode_dict(x, f, force_sort=True):
135117
r, f = OrderedDict(), f + 1
136118

137119
while x[f:f + 1] != b'e':
138-
k, f = decode_string(x, f, force_decode_utf8=True)
120+
k, f = decode_string(x, f)
139121
r[k], f = decode_func[x[f:f + 1]](x, f)
140122

141123
if force_sort:
@@ -219,13 +201,7 @@ def encode_bytes(x, r):
219201

220202
def encode_string(x, r):
221203
# type: (str, Deque[bytes]) -> None
222-
try:
223-
s = x.encode('utf-8')
224-
except UnicodeDecodeError:
225-
encode_bytes(x, r)
226-
return
227-
228-
r.extend((str(len(s)).encode('utf-8'), b':', s))
204+
return encode_bytes(x.encode("UTF-8"), r)
229205

230206

231207
def encode_list(x, r):
@@ -241,12 +217,13 @@ def encode_list(x, r):
241217
def encode_dict(x, r):
242218
# type: (Dict, Deque[bytes]) -> None
243219
r.append(b'd')
244-
ilist = list(x.items())
245-
ilist.sort()
220+
221+
# force all keys to bytes, because str and bytes are incomparable
222+
ilist = [(k if type(k) == type(b"") else k.encode("UTF-8"), v) for k, v in x.items()]
223+
ilist.sort(key=lambda kv: kv[0])
246224

247225
for k, v in ilist:
248-
k = k.encode('utf-8')
249-
r.extend((str(len(k)).encode('utf-8'), b':', k))
226+
encode_func[type(k)](k, r)
250227
encode_func[type(v)](v, r)
251228

252229
r.append(b'e')
@@ -263,7 +240,7 @@ def encode_dict(x, r):
263240
encode_func[IntType] = encode_int
264241
encode_func[ListType] = encode_list
265242
encode_func[LongType] = encode_int
266-
encode_func[StringType] = encode_string
243+
encode_func[StringType] = encode_bytes
267244
encode_func[TupleType] = encode_list
268245
encode_func[UnicodeType] = encode_string
269246

tests/bencode_tests.py

Lines changed: 14 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -15,66 +15,43 @@
1515

1616

1717
VALUES = [
18-
(0, 'i0e'),
19-
(1, 'i1e'),
20-
(10, 'i10e'),
21-
(42, 'i42e'),
22-
(-42, 'i-42e'),
23-
(True, 'i1e'),
24-
(False, 'i0e'),
25-
('spam', '4:spam'),
26-
('parrot sketch', '13:parrot sketch'),
27-
(['parrot sketch', 42], 'l13:parrot sketchi42ee'),
28-
({'foo': 42, 'bar': 'spam'}, 'd3:bar4:spam3:fooi42ee')
18+
(0, b'i0e'),
19+
(1, b'i1e'),
20+
(10, b'i10e'),
21+
(42, b'i42e'),
22+
(-42, b'i-42e'),
23+
(True, b'i1e'),
24+
(False, b'i0e'),
25+
(b'spam', b'4:spam'),
26+
(b'parrot sketch', b'13:parrot sketch'),
27+
([b'parrot sketch', 42], b'l13:parrot sketchi42ee'),
28+
({b'foo': 42, b'bar': b'spam'}, b'd3:bar4:spam3:fooi42ee')
2929
]
3030

3131
if OrderedDict is not None:
3232
VALUES.append((OrderedDict((
33-
('bar', 'spam'),
34-
('foo', 42)
35-
)), 'd3:bar4:spam3:fooi42ee'))
33+
(b'bar', b'spam'),
34+
(b'foo', 42)
35+
)), b'd3:bar4:spam3:fooi42ee'))
3636

3737

38-
@pytest.mark.skipif(sys.version_info[0] < 3, reason="Requires: Python 3+")
3938
def test_encode():
40-
"""Encode should give known result with known input."""
41-
for plain, encoded in VALUES:
42-
assert encoded.encode('utf-8') == bencode(plain)
43-
44-
45-
@pytest.mark.skipif(sys.version_info[0] != 2, reason="Requires: Python 2")
46-
def test_encode_py2():
4739
"""Encode should give known result with known input."""
4840
for plain, encoded in VALUES:
4941
assert encoded == bencode(plain)
5042

5143

52-
@pytest.mark.skipif(sys.version_info[0] < 3, reason="Requires: Python 3+")
5344
def test_encode_bencached():
5445
"""Ensure Bencached objects can be encoded."""
5546
assert bencode([Bencached(bencode('test'))]) == b'l4:teste'
5647

5748

58-
@pytest.mark.skipif(sys.version_info[0] != 2, reason="Requires: Python 2")
59-
def test_encode_bencached_py2():
60-
"""Ensure Bencached objects can be encoded."""
61-
assert bencode([Bencached(bencode('test'))]) == 'l4:teste'
62-
63-
6449
def test_encode_bytes():
6550
"""Ensure bytes can be encoded."""
6651
assert bencode(b'\x9c') == b'1:\x9c'
6752

6853

69-
@pytest.mark.skipif(sys.version_info[0] < 3, reason="Requires: Python 3+")
7054
def test_decode():
71-
"""Decode should give known result with known input."""
72-
for plain, encoded in VALUES:
73-
assert plain == bdecode(encoded.encode('utf-8'))
74-
75-
76-
@pytest.mark.skipif(sys.version_info[0] != 2, reason="Requires: Python 2")
77-
def test_decode_py2():
7855
"""Decode should give known result with known input."""
7956
for plain, encoded in VALUES:
8057
assert plain == bdecode(encoded)
@@ -85,15 +62,7 @@ def test_decode_bytes():
8562
assert bdecode(b'1:\x9c') == b'\x9c'
8663

8764

88-
@pytest.mark.skipif(sys.version_info[0] < 3, reason="Requires: Python 3+")
8965
def test_encode_roundtrip():
90-
"""Consecutive calls to decode and encode should deliver the original data again."""
91-
for plain, encoded in VALUES:
92-
assert encoded.encode('utf-8') == bencode(bdecode(encoded.encode('utf-8')))
93-
94-
95-
@pytest.mark.skipif(sys.version_info[0] != 2, reason="Requires: Python 2")
96-
def test_encode_roundtrip_py2():
9766
"""Consecutive calls to decode and encode should deliver the original data again."""
9867
for plain, encoded in VALUES:
9968
assert encoded == bencode(bdecode(encoded))
@@ -142,33 +111,15 @@ def test_dictionary_sorted():
142111
assert encoded.index(b'zoo') > encoded.index(b'bar')
143112

144113

145-
@pytest.mark.skipif(sys.version_info[0] < 3, reason="Requires: Python 3+")
146114
def test_dictionary_unicode():
147115
"""Test the handling of unicode in dictionaries."""
148116
encoded = bencode({u'foo': 42, 'bar': {u'sketch': u'parrot', 'foobar': 23}})
149117

150118
assert encoded == 'd3:bard6:foobari23e6:sketch6:parrote3:fooi42ee'.encode('utf-8')
151119

152120

153-
@pytest.mark.skipif(sys.version_info[0] != 2, reason="Requires: Python 2")
154-
def test_dictionary_unicode_py2():
155-
"""Test the handling of unicode in dictionaries."""
156-
encoded = bencode({u'foo': 42, 'bar': {u'sketch': u'parrot', 'foobar': 23}})
157-
158-
assert encoded == 'd3:bard6:foobari23e6:sketch6:parrote3:fooi42ee'
159-
160-
161-
@pytest.mark.skipif(sys.version_info[0] < 3, reason="Requires: Python 3+")
162121
def test_dictionary_nested():
163122
"""Test the handling of nested dictionaries."""
164123
encoded = bencode({'foo': 42, 'bar': {'sketch': 'parrot', 'foobar': 23}})
165124

166125
assert encoded == 'd3:bard6:foobari23e6:sketch6:parrote3:fooi42ee'.encode('utf-8')
167-
168-
169-
@pytest.mark.skipif(sys.version_info[0] != 2, reason="Requires: Python 2")
170-
def test_dictionary_nested_py2():
171-
"""Test the handling of nested dictionaries."""
172-
encoded = bencode({'foo': 42, 'bar': {'sketch': 'parrot', 'foobar': 23}})
173-
174-
assert encoded == 'd3:bard6:foobari23e6:sketch6:parrote3:fooi42ee'

tests/file_tests.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@ def test_read_file():
2222
with open(os.path.join(FIXTURE_DIR, 'alpha'), 'rb') as fp:
2323
data = bread(fp)
2424

25-
assert data == {'foo': 42, 'bar': {'sketch': 'parrot', 'foobar': 23}}
25+
assert data == {b'foo': 42, b'bar': {b'sketch': b'parrot', b'foobar': 23}}
2626

2727

2828
def test_read_path():
2929
"""Test the reading of bencode paths."""
3030
data = bread(os.path.join(FIXTURE_DIR, 'alpha'))
3131

32-
assert data == {'foo': 42, 'bar': {'sketch': 'parrot', 'foobar': 23}}
32+
assert data == {b'foo': 42, b'bar': {b'sketch': b'parrot', b'foobar': 23}}
3333

3434

3535
@pytest.mark.skipif(sys.version_info < (3, 4), reason="Requires: Python 3.4+")
@@ -39,30 +39,30 @@ def test_read_pathlib():
3939

4040
data = bread(Path(FIXTURE_DIR, 'alpha'))
4141

42-
assert data == {'foo': 42, 'bar': {'sketch': 'parrot', 'foobar': 23}}
42+
assert data == {b'foo': 42, b'bar': {b'sketch': b'parrot', b'foobar': 23}}
4343

4444

4545
def test_write_file():
4646
"""Test the writing of bencode paths."""
4747
with open(os.path.join(TEMP_DIR, 'beta'), 'wb') as fp:
4848
bwrite(
49-
{'foo': 42, 'bar': {'sketch': 'parrot', 'foobar': 23}},
49+
{b'foo': 42, b'bar': {b'sketch': b'parrot', b'foobar': 23}},
5050
fp
5151
)
5252

53-
with open(os.path.join(TEMP_DIR, 'beta'), 'r') as fp:
54-
assert fp.read() == 'd3:bard6:foobari23e6:sketch6:parrote3:fooi42ee'
53+
with open(os.path.join(TEMP_DIR, 'beta'), 'rb') as fp:
54+
assert fp.read() == b'd3:bard6:foobari23e6:sketch6:parrote3:fooi42ee'
5555

5656

5757
def test_write_path():
5858
"""Test the writing of bencode files."""
5959
bwrite(
60-
{'foo': 42, 'bar': {'sketch': 'parrot', 'foobar': 23}},
60+
{b'foo': 42, b'bar': {b'sketch': b'parrot', b'foobar': 23}},
6161
os.path.join(TEMP_DIR, 'beta')
6262
)
6363

64-
with open(os.path.join(TEMP_DIR, 'beta'), 'r') as fp:
65-
assert fp.read() == 'd3:bard6:foobari23e6:sketch6:parrote3:fooi42ee'
64+
with open(os.path.join(TEMP_DIR, 'beta'), 'rb') as fp:
65+
assert fp.read() == b'd3:bard6:foobari23e6:sketch6:parrote3:fooi42ee'
6666

6767

6868
@pytest.mark.skipif(sys.version_info < (3, 4), reason="Requires: Python 3.4+")
@@ -71,9 +71,9 @@ def test_write_pathlib():
7171
from pathlib import Path
7272

7373
bwrite(
74-
{'foo': 42, 'bar': {'sketch': 'parrot', 'foobar': 23}},
74+
{b'foo': 42, b'bar': {b'sketch': b'parrot', b'foobar': 23}},
7575
Path(TEMP_DIR, 'beta')
7676
)
7777

78-
with open(os.path.join(TEMP_DIR, 'beta'), 'r') as fp:
79-
assert fp.read() == 'd3:bard6:foobari23e6:sketch6:parrote3:fooi42ee'
78+
with open(os.path.join(TEMP_DIR, 'beta'), 'rb') as fp:
79+
assert fp.read() == b'd3:bard6:foobari23e6:sketch6:parrote3:fooi42ee'

0 commit comments

Comments
 (0)