Skip to content

Commit 2b3f42f

Browse files
committed
Merge #138: Add buffering during encode
c215c3d Add a buffer when writing (Tobin C. Harding) 94625e7 segwit: Add a buffer when writing (Tobin C. Harding) 596f1f7 Add encode::ByteIter (Tobin C. Harding) Pull request description: Currently we write char at a time to both writers (`fmt::Write` and `std::io::Write`). We can improve performance by first collecting the ASCII bytes of the encoded bech32 string into a buffer on the stack then writing the buffer as a single call. This is purely an optimization. The second patch is surprising, at least to me, would love to learn what I'm doing wrong. I thought this would be valid ```rust let mut buf = [0_u8; Ck::CODE_LENGTH] ``` But the compiler does not like the usage of an associated const. ACKs for top commit: apoelstra: ACK c215c3d Tree-SHA512: 41f2f70f44b35736e2a8aec210daa6f73a8e97f31d165c97bc7b32d7b28e066ef29a17f105872071d0f4bc84e5883f2d4cbed65ac748cae663819a316bfda1f9
2 parents ac74415 + c215c3d commit 2b3f42f

File tree

3 files changed

+187
-28
lines changed

3 files changed

+187
-28
lines changed

src/lib.rs

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,12 @@ pub use {
168168
crate::primitives::{Bech32, Bech32m, NoChecksum},
169169
};
170170

171+
// Write to fmt buffer, small during testing to exercise full code path.
172+
#[cfg(not(test))]
173+
const BUF_LENGTH: usize = 1024;
174+
#[cfg(test)]
175+
const BUF_LENGTH: usize = 10;
176+
171177
/// Decodes a bech32 encoded string.
172178
///
173179
/// If this function succeeds the input string was found to be well formed (hrp, separator, bech32
@@ -276,11 +282,26 @@ pub fn encode_lower_to_fmt<Ck: Checksum, W: fmt::Write>(
276282
) -> Result<(), EncodeError> {
277283
let _ = encoded_length::<Ck>(hrp, data)?;
278284

285+
let mut buf = [0u8; BUF_LENGTH];
286+
let mut pos = 0;
287+
279288
let iter = data.iter().copied().bytes_to_fes();
280289
let chars = iter.with_checksum::<Ck>(&hrp).chars();
290+
281291
for c in chars {
282-
fmt.write_char(c)?;
292+
buf[pos] = c as u8;
293+
pos += 1;
294+
295+
if pos == BUF_LENGTH {
296+
let s = core::str::from_utf8(&buf).expect("we only write ASCII");
297+
fmt.write_str(s)?;
298+
pos = 0;
299+
}
283300
}
301+
302+
let s = core::str::from_utf8(&buf[..pos]).expect("we only write ASCII");
303+
fmt.write_str(s)?;
304+
284305
Ok(())
285306
}
286307

@@ -296,11 +317,25 @@ pub fn encode_upper_to_fmt<Ck: Checksum, W: fmt::Write>(
296317
) -> Result<(), EncodeError> {
297318
let _ = encoded_length::<Ck>(hrp, data)?;
298319

320+
let mut buf = [0u8; BUF_LENGTH];
321+
let mut pos = 0;
322+
299323
let iter = data.iter().copied().bytes_to_fes();
300324
let chars = iter.with_checksum::<Ck>(&hrp).chars();
325+
301326
for c in chars {
302-
fmt.write_char(c.to_ascii_uppercase())?;
327+
buf[pos] = c.to_ascii_uppercase() as u8;
328+
pos += 1;
329+
if pos == BUF_LENGTH {
330+
let s = core::str::from_utf8(&buf).expect("we only write ASCII");
331+
fmt.write_str(s)?;
332+
pos = 0;
333+
}
303334
}
335+
336+
let s = core::str::from_utf8(&buf[..pos]).expect("we only write ASCII");
337+
fmt.write_str(s)?;
338+
304339
Ok(())
305340
}
306341

@@ -331,11 +366,23 @@ pub fn encode_lower_to_writer<Ck: Checksum, W: std::io::Write>(
331366
) -> Result<(), EncodeIoError> {
332367
let _ = encoded_length::<Ck>(hrp, data)?;
333368

369+
let mut buf = [0u8; BUF_LENGTH];
370+
let mut pos = 0;
371+
334372
let iter = data.iter().copied().bytes_to_fes();
335373
let chars = iter.with_checksum::<Ck>(&hrp).chars();
374+
336375
for c in chars {
337-
w.write_all(&[c as u8])?;
376+
buf[pos] = c as u8;
377+
pos += 1;
378+
if pos == BUF_LENGTH {
379+
w.write_all(&buf)?;
380+
pos = 0;
381+
}
338382
}
383+
384+
w.write_all(&buf[..pos])?;
385+
339386
Ok(())
340387
}
341388

@@ -352,11 +399,23 @@ pub fn encode_upper_to_writer<Ck: Checksum, W: std::io::Write>(
352399
) -> Result<(), EncodeIoError> {
353400
let _ = encoded_length::<Ck>(hrp, data)?;
354401

402+
let mut buf = [0u8; BUF_LENGTH];
403+
let mut pos = 0;
404+
355405
let iter = data.iter().copied().bytes_to_fes();
356406
let chars = iter.with_checksum::<Ck>(&hrp).chars();
407+
357408
for c in chars {
358-
w.write_all(&[c.to_ascii_uppercase() as u8])?;
409+
buf[pos] = c.to_ascii_uppercase() as u8;
410+
pos += 1;
411+
if pos == BUF_LENGTH {
412+
w.write_all(&buf)?;
413+
pos = 0;
414+
}
359415
}
416+
417+
w.write_all(&buf[..pos])?;
418+
360419
Ok(())
361420
}
362421

src/primitives/encode.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,14 @@ where
113113
CharIter::new(self.hrp, witver_iter)
114114
}
115115

116+
/// Returns an iterator that yields the bech32 encoded address as field ASCII characters, as
117+
/// byte values.
118+
#[inline]
119+
pub fn bytes(self) -> ByteIter<'hrp, I, Ck> {
120+
let char_iter = self.chars();
121+
ByteIter::new(char_iter)
122+
}
123+
116124
/// Returns an iterator that yields the field elements that go into the checksum, as well as the checksum at the end.
117125
///
118126
/// Each field element yielded has been input into the checksum algorithm (including the HRP as it is fed into the algorithm).
@@ -237,6 +245,43 @@ where
237245
}
238246
}
239247

248+
/// Iterator adaptor which takes a stream of ASCII field elements (an encoded string) and yields a stream of bytes.
249+
///
250+
/// This is equivalent to using the `CharsIter` and the casting each character to a byte. Doing
251+
/// so is technically sound because we only yield ASCII characters but it makes for ugly code so
252+
/// we provide this iterator also.
253+
pub struct ByteIter<'hrp, I, Ck>
254+
where
255+
I: Iterator<Item = Fe32>,
256+
Ck: Checksum,
257+
{
258+
char_iter: CharIter<'hrp, I, Ck>,
259+
}
260+
261+
impl<'hrp, I, Ck> ByteIter<'hrp, I, Ck>
262+
where
263+
I: Iterator<Item = Fe32>,
264+
Ck: Checksum,
265+
{
266+
/// Adapts the `CharIter` iterator to yield bytes representing the bech32 encoding as ASCII bytes.
267+
#[inline]
268+
pub fn new(char_iter: CharIter<'hrp, I, Ck>) -> Self { Self { char_iter } }
269+
}
270+
271+
impl<'a, I, Ck> Iterator for ByteIter<'a, I, Ck>
272+
where
273+
I: Iterator<Item = Fe32>,
274+
Ck: Checksum,
275+
{
276+
type Item = u8;
277+
278+
#[inline]
279+
fn next(&mut self) -> Option<u8> { self.char_iter.next().map(|c| c as u8) }
280+
281+
#[inline]
282+
fn size_hint(&self) -> (usize, Option<usize>) { self.char_iter.size_hint() }
283+
}
284+
240285
/// Iterator adaptor for a checksummed iterator that inputs the HRP into the checksum algorithm
241286
/// before yielding the HRP as field elements followed by the data then checksum.
242287
pub struct Fe32Iter<'hrp, I, Ck>
@@ -344,4 +389,19 @@ mod tests {
344389
let checksummed_len = 2 + 1 + 1 + char_len + 6; // bc + SEP + Q + chars + checksum
345390
assert_eq!(iter.size_hint().0, checksummed_len);
346391
}
392+
393+
#[test]
394+
#[cfg(feature = "alloc")]
395+
fn hrpstring_iter_bytes() {
396+
let hrp = Hrp::parse_unchecked("bc");
397+
let fes = DATA.iter().copied().bytes_to_fes();
398+
let iter = fes.with_checksum::<Bech32>(&hrp).with_witness_version(Fe32::Q);
399+
400+
let chars = iter.clone().chars();
401+
let bytes = iter.bytes();
402+
403+
for (c, b) in chars.zip(bytes) {
404+
assert_eq!(c as u8, b)
405+
}
406+
}
347407
}

src/segwit.rs

Lines changed: 64 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -156,19 +156,30 @@ pub fn encode_lower_to_fmt_unchecked<W: fmt::Write>(
156156
witness_version: Fe32,
157157
witness_program: &[u8],
158158
) -> fmt::Result {
159+
let mut buf = [0u8; MAX_STRING_LENGTH];
160+
let mut pos = 0;
161+
159162
let iter = witness_program.iter().copied().bytes_to_fes();
160163
match witness_version {
161164
VERSION_0 => {
162-
for c in iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).chars() {
163-
fmt.write_char(c)?;
164-
}
165+
let bytes = iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).bytes();
166+
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
167+
*dst = src;
168+
pos += 1;
169+
});
165170
}
166171
version => {
167-
for c in iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).chars() {
168-
fmt.write_char(c)?;
169-
}
172+
let bytes = iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).bytes();
173+
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
174+
*dst = src;
175+
pos += 1;
176+
});
170177
}
171178
}
179+
180+
let s = core::str::from_utf8(&buf[..pos]).expect("we only write ASCII");
181+
fmt.write_str(s)?;
182+
172183
Ok(())
173184
}
174185

@@ -185,20 +196,30 @@ pub fn encode_upper_to_fmt_unchecked<W: fmt::Write>(
185196
witness_version: Fe32,
186197
witness_program: &[u8],
187198
) -> fmt::Result {
199+
let mut buf = [0u8; MAX_STRING_LENGTH];
200+
let mut pos = 0;
201+
188202
let iter = witness_program.iter().copied().bytes_to_fes();
189203
match witness_version {
190204
VERSION_0 => {
191-
for c in iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).chars() {
192-
fmt.write_char(c.to_ascii_uppercase())?;
193-
}
205+
let bytes = iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).bytes();
206+
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
207+
*dst = src.to_ascii_uppercase();
208+
pos += 1;
209+
});
194210
}
195211
version => {
196-
for c in iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).chars() {
197-
fmt.write_char(c.to_ascii_uppercase())?;
198-
}
212+
let bytes = iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).bytes();
213+
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
214+
*dst = src.to_ascii_uppercase();
215+
pos += 1;
216+
});
199217
}
200218
}
201219

220+
let s = core::str::from_utf8(&buf[..pos]).expect("we only write ASCII");
221+
fmt.write_str(s)?;
222+
202223
Ok(())
203224
}
204225

@@ -229,19 +250,29 @@ pub fn encode_lower_to_writer_unchecked<W: std::io::Write>(
229250
witness_version: Fe32,
230251
witness_program: &[u8],
231252
) -> std::io::Result<()> {
253+
let mut buf = [0u8; MAX_STRING_LENGTH];
254+
let mut pos = 0;
255+
232256
let iter = witness_program.iter().copied().bytes_to_fes();
233257
match witness_version {
234258
VERSION_0 => {
235-
for c in iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).chars() {
236-
w.write_all(&[c.to_ascii_lowercase() as u8])?;
237-
}
259+
let bytes = iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).bytes();
260+
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
261+
*dst = src;
262+
pos += 1;
263+
});
238264
}
239265
version => {
240-
for c in iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).chars() {
241-
w.write_all(&[c.to_ascii_lowercase() as u8])?;
242-
}
266+
let bytes = iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).bytes();
267+
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
268+
*dst = src;
269+
pos += 1;
270+
});
243271
}
244272
}
273+
274+
w.write_all(&buf[..pos])?;
275+
245276
Ok(())
246277
}
247278

@@ -259,20 +290,29 @@ pub fn encode_upper_to_writer_unchecked<W: std::io::Write>(
259290
witness_version: Fe32,
260291
witness_program: &[u8],
261292
) -> std::io::Result<()> {
293+
let mut buf = [0u8; MAX_STRING_LENGTH];
294+
let mut pos = 0;
295+
262296
let iter = witness_program.iter().copied().bytes_to_fes();
263297
match witness_version {
264298
VERSION_0 => {
265-
for c in iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).chars() {
266-
w.write_all(&[c.to_ascii_uppercase() as u8])?;
267-
}
299+
let bytes = iter.with_checksum::<Bech32>(&hrp).with_witness_version(VERSION_0).bytes();
300+
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
301+
*dst = src.to_ascii_uppercase();
302+
pos += 1;
303+
});
268304
}
269305
version => {
270-
for c in iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).chars() {
271-
w.write_all(&[c.to_ascii_uppercase() as u8])?;
272-
}
306+
let bytes = iter.with_checksum::<Bech32m>(&hrp).with_witness_version(version).bytes();
307+
buf.iter_mut().zip(bytes).for_each(|(dst, src)| {
308+
*dst = src.to_ascii_uppercase();
309+
pos += 1;
310+
});
273311
}
274312
}
275313

314+
w.write_all(&buf[..pos])?;
315+
276316
Ok(())
277317
}
278318

0 commit comments

Comments
 (0)