github
diff --git a/‎Cargo.toml
Lines changed: 1 addition & 0 deletions b/‎Cargo.toml
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/bpe/Cargo.toml
Lines changed: 0 additions & 7 deletions b/‎crates/bpe/Cargo.toml
Lines changed: 0 additions & 7 deletions
diff --git a/‎crates/bpe/README.md
Lines changed: 13 additions & 7 deletions b/‎crates/bpe/README.md
Lines changed: 13 additions & 7 deletions
diff --git a/‎crates/bpe/benchmarks/.gitignore
Lines changed: 1 addition & 0 deletions b/‎crates/bpe/benchmarks/.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/bpe/benchmarks/Cargo.toml
Lines changed: 17 additions & 0 deletions b/‎crates/bpe/benchmarks/Cargo.toml
Lines changed: 17 additions & 0 deletions
diff --git a/‎crates/bpe/benchmarks/criterion.toml
Lines changed: 18 additions & 0 deletions b/‎crates/bpe/benchmarks/criterion.toml
Lines changed: 18 additions & 0 deletions
@@ -2,6 +2,7 @@
 
 members = [
     "crates/*",
+    "crates/bpe/benchmarks",
 ]
 resolver = "2"
 
 
@@ -12,12 +12,6 @@ categories = ["algorithms", "data-structures", "encoding", "science"]
 crate-type = ["lib", "staticlib"]
 bench = false
 
-[[bench]]
-name = "performance"
-path = "benches/performance.rs"
-harness = false
-test = false
-
 [features]
 rand = ["dep:rand"]
 tiktoken-rs = ["dep:tiktoken-rs"]
@@ -33,4 +27,3 @@ tiktoken-rs = { version = "0.5", optional = true }
 
 [dev-dependencies]
 bpe = { path = ".", features = ["rand", "tiktoken-rs"] }
-criterion = "0.5"
@@ -183,8 +183,8 @@ On average it is about ~4 faster, since the short-cuts usually pay off.
 
 ## Benchmarks
 
-We ran several benchmarks to compare performance of different encoders and a tiktoken implementation.
-For the tiktoken implementation we used [tiktoken-rs](https://crates.io/crates/tiktoken-rs) library, a wrapper around OpenAI's tiktoken implementation.
+We ran several benchmarks to compare performance of different encoders, and tiktoken and Huggingface tokenizers.
+We used [tiktoken-rs](https://crates.io/crates/tiktoken-rs), a wrapper around OpenAI's tiktoken implementation, and Huggingface's [tokenizers](https://crates.io/crates/tokenizers).
 Note that tiktoken does not run BPE on the full input text.
 Instead it splits it into large chunks using a regex and runs BPE on the individual chunks.
 We have not tried to see if that approach is compatible with our BPE implementation.
@@ -225,13 +225,13 @@ The backtracking encoder, the fastest encoder that still returns correct results
 The fully dynamic programming solution and the heap implementation are still quite competitive to TikToken (especially for smaller inputs).
 If the requirement of correct BPE output can be relaxed, then the Greedy approach or the minimal encoding approach are the clear winners.
 
-![encoding runtime comparison](./benches/result/encoding-o200k.svg)
+![encoding runtime comparison](./images/performance-encoding.svg)
 
 The graph below shows encoding results for input that is particularly challenging for tiktoken.
 The input consists of random ranges taken from the continuous list of all Unicode code points excluding whitespace.
 This inhibits tiktoken ability to split the input before applying BPE revealing its quadratic runtime complexity.
 
-![worst-case encoding runtime comparison](./benches/result/worstcase-o200k.svg)
+![worst-case encoding runtime comparison](./images/performance-worstcase.svg)
 
 ### Incremental encoding
 
@@ -246,7 +246,7 @@ The graph below shows encoding runtime vs slice length.
 The overall runtime of byte-by-byte incremental encoder for encoding the full text is comparable to the runtime of the backtracking encoder, with only a constant factor overhead.
 Note that this is a huge win for incremental use cases, which would otherwise require retokenization after each append, resulting in a quadratic slowdown.
 
-![appending runtime comparison](./benches/result/appending-o200k.svg)
+![appending runtime comparison](./images/performance-appending.svg)
 
 ### Interval counting
 
@@ -264,10 +264,16 @@ The graph below shows counting runtime vs slice length.
 The runtime of the backtracking encoder grows with the length of the slice.
 The interval encoder counts any interval in typically constant time.
 
-![counting runtime comparison](./benches/result/counting-o200k.svg)
+![counting runtime comparison](./images/performance-counting.svg)
 
 ### Running the benchmarks
 
+Benchmarks are located in a separate crate in the `benchmarks` directory.
+
+```sh
+cd benchmarks
+```
+
 Run the benchmark as follows (required [cargo-criterion](https://crates.io/crates/cargo-criterion) installed):
 
 ```sh
@@ -280,5 +286,5 @@ Open the full report which should be located in `target/criterion/reports/index.
 Update the figures in this repo as follows (requires `rsvg-convert` from `librsvg` installed):
 
 ```sh
-script/copy-benchmark-results
+script/copy-results
 ```
@@ -0,0 +1 @@
+target/
@@ -0,0 +1,17 @@
+[package]
+name = "bpe-benches"
+edition = "2021"
+
+[[bench]]
+name = "performance"
+path = "performance.rs"
+harness = false
+test = false
+
+[dev-dependencies]
+bpe = { path = "../../bpe", features = ["rand", "tiktoken-rs"] }
+bpe-openai = { path = "../../bpe-openai" }
+criterion = "0.5"
+rand = "0.8"
+tiktoken-rs = "0.5"
+tokenizers = "0.20"
@@ -0,0 +1,18 @@
+# save report in this directory, even if a custom target directory is set
+criterion_home = "./target/criterion"
+
+# The colors table allows users to configure the colors used by the charts 
+# cargo-criterion generates.
+[colors]
+# Color-blind friendly color scheme from https://personal.sron.nl/~pault/.
+comparison_colors = [
+  {r =  51, g =  34, b = 136 }, # indigo
+  {r = 136, g = 204, b = 238 }, # cyan
+  {r =  68, g = 170, b = 153 }, # teal
+  {r =  17, g = 119, b =  51 }, # green
+  {r = 153, g = 153, b =  51 }, # olive
+  {r = 221, g = 204, b = 119 }, # sand
+  {r = 204, g = 102, b = 119 }, # rose
+  {r = 136, g =  34, b =  85 }, # wine
+  {r = 170, g =  68, b = 153 }, # purple
+]
Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`
`3`	`3`	`members = [`
`4`	`4`	`"crates/*",`
	`5`	`+ "crates/bpe/benchmarks",`
`5`	`6`	`]`
`6`	`7`	`resolver = "2"`
`7`	`8`