File tree Expand file tree Collapse file tree 1 file changed +11
-9
lines changed Expand file tree Collapse file tree 1 file changed +11
-9
lines changed Original file line number Diff line number Diff line change 7
7
import pytest
8
8
9
9
import tiktoken
10
-
11
10
from .test_helpers import ENCODING_FACTORIES , MAX_EXAMPLES
12
11
13
12
@@ -61,13 +60,16 @@ def test_simple_regex():
61
60
def test_basic_encode ():
62
61
enc = tiktoken .get_encoding ("r50k_base" )
63
62
assert enc .encode ("hello world" ) == [31373 , 995 ]
63
+ assert enc .encode ("a" * 1000 ) == [24794 ] * 250
64
64
65
65
enc = tiktoken .get_encoding ("p50k_base" )
66
66
assert enc .encode ("hello world" ) == [31373 , 995 ]
67
+ assert enc .encode ("a" * 1000 ) == [24794 ] * 250
67
68
68
69
enc = tiktoken .get_encoding ("cl100k_base" )
69
70
assert enc .encode ("hello world" ) == [15339 , 1917 ]
70
71
assert enc .encode (" \x85 0" ) == [220 , 126 , 227 , 15 ]
72
+ assert enc .encode ("a" * 1000 ) == [70540 ] * 125
71
73
72
74
73
75
def test_encode_empty ():
@@ -100,14 +102,14 @@ def test_encode_surrogate_pairs():
100
102
def test_basic_roundtrip (make_enc ):
101
103
enc = make_enc ()
102
104
for value in (
103
- "hello" ,
104
- "hello " ,
105
- "hello " ,
106
- " hello" ,
107
- " hello " ,
108
- " hello " ,
109
- "hello world" ,
110
- "请考试我的软件!12345" ,
105
+ "hello" ,
106
+ "hello " ,
107
+ "hello " ,
108
+ " hello" ,
109
+ " hello " ,
110
+ " hello " ,
111
+ "hello world" ,
112
+ "请考试我的软件!12345" ,
111
113
):
112
114
assert value == enc .decode (enc .encode (value ))
113
115
assert value == enc .decode (enc .encode_ordinary (value ))
You can’t perform that action at this time.
0 commit comments