Skip to content

Commit cb2f61e

Browse files
committed
Merge #167: decode: Add accessors
2fae3aa Add UncheckedHrpstring::remove_witness_version function (Tobin C. Harding) 0ee999d Add UncheckHrpstring::witness_version function (Tobin C. Harding) b91207c Add ascii accessor methods (Tobin C. Harding) 045b50a decode: Rename data field (Tobin C. Harding) Pull request description: This is a sexy little PR right here. Add an `ascii` accessor method to the `UncheckedHrpstring` and `CheckedHrpstring` types. - Patch 1 is preparation, renames the `data` field. - Patch 2 is the meat and potatoes. Fix: #160 ACKs for top commit: apoelstra: ACK 2fae3aa Tree-SHA512: ce706db35d1119d3a240ffdfc11c4205b7a6052eed57019fa730ccf4643b78b5058a067b112b7bdd0e8fecf5108fe2e0ef6d2c072caeac08a08b461256318cf8
2 parents 84b2c50 + 2fae3aa commit cb2f61e

File tree

1 file changed

+154
-32
lines changed

1 file changed

+154
-32
lines changed

src/primitives/decode.rs

Lines changed: 154 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ pub struct UncheckedHrpstring<'s> {
117117
hrp: Hrp,
118118
/// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters.
119119
///
120-
/// Contains the checksum if one was present in the parsed string.
121-
data: &'s [u8],
120+
/// The characters after the separator i.e., the "data part" defined by BIP-173.
121+
data_part_ascii: &'s [u8],
122122
/// The length of the parsed hrpstring.
123123
hrpstring_length: usize,
124124
}
@@ -130,11 +130,11 @@ impl<'s> UncheckedHrpstring<'s> {
130130
#[inline]
131131
pub fn new(s: &'s str) -> Result<Self, UncheckedHrpstringError> {
132132
let sep_pos = check_characters(s)?;
133-
let (hrp, data) = s.split_at(sep_pos);
133+
let (hrp, rest) = s.split_at(sep_pos);
134134

135135
let ret = UncheckedHrpstring {
136136
hrp: Hrp::parse(hrp)?,
137-
data: data[1..].as_bytes(), // Skip the separator.
137+
data_part_ascii: rest[1..].as_bytes(), // Skip the separator.
138138
hrpstring_length: s.len(),
139139
};
140140

@@ -145,6 +145,85 @@ impl<'s> UncheckedHrpstring<'s> {
145145
#[inline]
146146
pub fn hrp(&self) -> Hrp { self.hrp }
147147

148+
/// Returns the data part as ASCII bytes i.e., everything after the separator '1'.
149+
///
150+
/// The byte values are guaranteed to be valid bech32 characters. Includes the checksum
151+
/// if one was present in the parsed string.
152+
///
153+
/// # Examples
154+
///
155+
/// ```
156+
/// use bech32::primitives::decode::UncheckedHrpstring;
157+
///
158+
/// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
159+
/// let ascii = "qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
160+
///
161+
/// let unchecked = UncheckedHrpstring::new(&addr).unwrap();
162+
/// assert!(unchecked.data_part_ascii().iter().eq(ascii.as_bytes().iter()))
163+
/// ```
164+
#[inline]
165+
pub fn data_part_ascii(&self) -> &[u8] { self.data_part_ascii }
166+
167+
/// Attempts to remove the first byte of the data part, treating it as a witness version.
168+
///
169+
/// If [`Self::witness_version`] succeeds this function removes the first character (witness
170+
/// version byte) from the internal ASCII data part buffer. Future calls to
171+
/// [`Self::data_part_ascii`] will no longer include it.
172+
///
173+
/// # Examples
174+
///
175+
/// ```
176+
/// use bech32::{primitives::decode::UncheckedHrpstring, Fe32};
177+
///
178+
/// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
179+
/// let ascii = "ar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
180+
///
181+
/// let mut unchecked = UncheckedHrpstring::new(&addr).unwrap();
182+
/// let witness_version = unchecked.remove_witness_version().unwrap();
183+
/// assert_eq!(witness_version, Fe32::Q);
184+
/// assert!(unchecked.data_part_ascii().iter().eq(ascii.as_bytes().iter()))
185+
/// ```
186+
#[inline]
187+
pub fn remove_witness_version(&mut self) -> Option<Fe32> {
188+
self.witness_version().map(|witver| {
189+
self.data_part_ascii = &self.data_part_ascii[1..]; // Remove the witness version byte.
190+
witver
191+
})
192+
}
193+
194+
/// Returns the segwit witness version if there is one.
195+
///
196+
/// Attempts to convert the first character of the data part to a witness version. If this
197+
/// succeeds, and it is a valid version (0..16 inclusive) we return it, otherwise `None`.
198+
///
199+
/// This function makes no guarantees on the validity of the checksum.
200+
///
201+
/// # Examples
202+
///
203+
/// ```
204+
/// use bech32::{primitives::decode::UncheckedHrpstring, Fe32};
205+
///
206+
/// // Note the invalid checksum!
207+
/// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzffffff";
208+
///
209+
/// let unchecked = UncheckedHrpstring::new(&addr).unwrap();
210+
/// assert_eq!(unchecked.witness_version(), Some(Fe32::Q));
211+
/// ```
212+
#[inline]
213+
pub fn witness_version(&self) -> Option<Fe32> {
214+
let data_part = self.data_part_ascii();
215+
if data_part.is_empty() {
216+
return None;
217+
}
218+
219+
// unwrap ok because we know we gave valid bech32 characters.
220+
let witness_version = Fe32::from_char(data_part[0].into()).unwrap();
221+
if witness_version.to_u8() > 16 {
222+
return None;
223+
}
224+
Some(witness_version)
225+
}
226+
148227
/// Validates that data has a valid checksum for the `Ck` algorithm and returns a [`CheckedHrpstring`].
149228
#[inline]
150229
pub fn validate_and_remove_checksum<Ck: Checksum>(
@@ -183,15 +262,15 @@ impl<'s> UncheckedHrpstring<'s> {
183262
return Ok(());
184263
}
185264

186-
if self.data.len() < Ck::CHECKSUM_LENGTH {
265+
if self.data_part_ascii.len() < Ck::CHECKSUM_LENGTH {
187266
return Err(InvalidLength);
188267
}
189268

190269
let mut checksum_eng = checksum::Engine::<Ck>::new();
191270
checksum_eng.input_hrp(self.hrp());
192271

193272
// Unwrap ok since we checked all characters in our constructor.
194-
for fe in self.data.iter().map(|&b| Fe32::from_char_unchecked(b)) {
273+
for fe in self.data_part_ascii.iter().map(|&b| Fe32::from_char_unchecked(b)) {
195274
checksum_eng.input_fe(fe);
196275
}
197276

@@ -213,20 +292,20 @@ impl<'s> UncheckedHrpstring<'s> {
213292
/// May panic if data is not valid.
214293
#[inline]
215294
pub fn remove_checksum<Ck: Checksum>(self) -> CheckedHrpstring<'s> {
216-
let data_len = self.data.len() - Ck::CHECKSUM_LENGTH;
295+
let end = self.data_part_ascii.len() - Ck::CHECKSUM_LENGTH;
217296

218297
CheckedHrpstring {
219298
hrp: self.hrp(),
220-
data: &self.data[..data_len],
299+
ascii: &self.data_part_ascii[..end],
221300
hrpstring_length: self.hrpstring_length,
222301
}
223302
}
224303
}
225304

226305
/// An HRP string that has been parsed and had the checksum validated.
227306
///
228-
/// This type does not treat the first byte of the data in any special way i.e., as the witness
229-
/// version byte. If you are parsing Bitcoin segwit addresses you likely want to use [`SegwitHrpstring`].
307+
/// This type does not treat the first byte of the data part in any special way i.e., as the witness
308+
/// version byte. If you are parsing Bitcoin segwit addresses consider using [`SegwitHrpstring`].
230309
///
231310
/// > We first describe the general checksummed base32 format called Bech32 and then
232311
/// > define Segregated Witness addresses using it.
@@ -250,9 +329,10 @@ impl<'s> UncheckedHrpstring<'s> {
250329
pub struct CheckedHrpstring<'s> {
251330
/// The human-readable part, guaranteed to be lowercase ASCII characters.
252331
hrp: Hrp,
253-
/// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters,
254-
/// with the checksum removed.
255-
data: &'s [u8],
332+
/// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters.
333+
///
334+
/// The characters after the '1' separator and the before the checksum.
335+
ascii: &'s [u8],
256336
/// The length of the parsed hrpstring.
257337
hrpstring_length: usize, // Guaranteed to be <= CK::CODE_LENGTH
258338
}
@@ -274,19 +354,38 @@ impl<'s> CheckedHrpstring<'s> {
274354
#[inline]
275355
pub fn hrp(&self) -> Hrp { self.hrp }
276356

357+
/// Returns a partial slice of the data part, as ASCII bytes, everything after the separator '1'
358+
/// before the checksum.
359+
///
360+
/// The byte values are guaranteed to be valid bech32 characters.
361+
///
362+
/// # Examples
363+
///
364+
/// ```
365+
/// use bech32::{Bech32, primitives::decode::CheckedHrpstring};
366+
///
367+
/// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
368+
/// let ascii = "qar0srrr7xfkvy5l643lydnw9re59gtzz";
369+
///
370+
/// let checked = CheckedHrpstring::new::<Bech32>(&addr).unwrap();
371+
/// assert!(checked.data_part_ascii_no_checksum().iter().eq(ascii.as_bytes().iter()))
372+
/// ```
373+
#[inline]
374+
pub fn data_part_ascii_no_checksum(&self) -> &[u8] { self.ascii }
375+
277376
/// Returns an iterator that yields the data part of the parsed bech32 encoded string.
278377
///
279378
/// Converts the ASCII bytes representing field elements to the respective field elements, then
280379
/// converts the stream of field elements to a stream of bytes.
281380
#[inline]
282381
pub fn byte_iter(&self) -> ByteIter {
283-
ByteIter { iter: AsciiToFe32Iter { iter: self.data.iter().copied() }.fes_to_bytes() }
382+
ByteIter { iter: AsciiToFe32Iter { iter: self.ascii.iter().copied() }.fes_to_bytes() }
284383
}
285384

286385
/// Converts this type to a [`SegwitHrpstring`] after validating the witness and HRP.
287386
#[inline]
288387
pub fn validate_segwit(mut self) -> Result<SegwitHrpstring<'s>, SegwitHrpstringError> {
289-
if self.data.is_empty() {
388+
if self.ascii.is_empty() {
290389
return Err(SegwitHrpstringError::NoData);
291390
}
292391

@@ -295,28 +394,28 @@ impl<'s> CheckedHrpstring<'s> {
295394
}
296395

297396
// Unwrap ok since check_characters checked the bech32-ness of this char.
298-
let witness_version = Fe32::from_char(self.data[0].into()).unwrap();
299-
self.data = &self.data[1..]; // Remove the witness version byte from data.
397+
let witness_version = Fe32::from_char(self.ascii[0].into()).unwrap();
398+
self.ascii = &self.ascii[1..]; // Remove the witness version byte.
300399

301400
self.validate_padding()?;
302401
self.validate_witness_program_length(witness_version)?;
303402

304-
Ok(SegwitHrpstring { hrp: self.hrp(), witness_version, data: self.data })
403+
Ok(SegwitHrpstring { hrp: self.hrp(), witness_version, ascii: self.ascii })
305404
}
306405

307406
/// Validates the segwit padding rules.
308407
///
309-
/// Must be called after the witness version byte is removed from the data.
408+
/// Must be called after the witness version byte is removed from the data part.
310409
///
311410
/// From BIP-173:
312411
/// > Re-arrange those bits into groups of 8 bits. Any incomplete group at the
313412
/// > end MUST be 4 bits or less, MUST be all zeroes, and is discarded.
314413
fn validate_padding(&self) -> Result<(), PaddingError> {
315-
if self.data.is_empty() {
414+
if self.ascii.is_empty() {
316415
return Ok(()); // Empty data implies correct padding.
317416
}
318417

319-
let fe_iter = AsciiToFe32Iter { iter: self.data.iter().copied() };
418+
let fe_iter = AsciiToFe32Iter { iter: self.ascii.iter().copied() };
320419
let padding_len = fe_iter.len() * 5 % 8;
321420

322421
if padding_len > 4 {
@@ -343,7 +442,7 @@ impl<'s> CheckedHrpstring<'s> {
343442

344443
/// Validates the segwit witness length rules.
345444
///
346-
/// Must be called after the witness version byte is removed from the data.
445+
/// Must be called after the witness version byte is removed from the data part.
347446
fn validate_witness_program_length(
348447
&self,
349448
witness_version: Fe32,
@@ -372,11 +471,12 @@ impl<'s> CheckedHrpstring<'s> {
372471
pub struct SegwitHrpstring<'s> {
373472
/// The human-readable part, valid for segwit addresses.
374473
hrp: Hrp,
375-
/// The first byte of the parsed data.
474+
/// The first byte of the parsed data part.
376475
witness_version: Fe32,
377-
/// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters,
378-
/// with the witness version and checksum removed.
379-
data: &'s [u8],
476+
/// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters.
477+
///
478+
/// The characters after the witness version and before the checksum.
479+
ascii: &'s [u8],
380480
}
381481

382482
impl<'s> SegwitHrpstring<'s> {
@@ -396,12 +496,14 @@ impl<'s> SegwitHrpstring<'s> {
396496

397497
let unchecked = UncheckedHrpstring::new(s)?;
398498

399-
if unchecked.data.is_empty() {
499+
let data_part = unchecked.data_part_ascii();
500+
501+
if data_part.is_empty() {
400502
return Err(SegwitHrpstringError::NoData);
401503
}
402504

403505
// Unwrap ok since check_characters (in `Self::new`) checked the bech32-ness of this char.
404-
let witness_version = Fe32::from_char(unchecked.data[0].into()).unwrap();
506+
let witness_version = Fe32::from_char(data_part[0].into()).unwrap();
405507
if witness_version.to_u8() > 16 {
406508
return Err(SegwitHrpstringError::InvalidWitnessVersion(witness_version));
407509
}
@@ -429,9 +531,10 @@ impl<'s> SegwitHrpstring<'s> {
429531
#[inline]
430532
pub fn new_bech32(s: &'s str) -> Result<Self, SegwitHrpstringError> {
431533
let unchecked = UncheckedHrpstring::new(s)?;
534+
let data_part = unchecked.data_part_ascii();
432535

433536
// Unwrap ok since check_characters (in `Self::new`) checked the bech32-ness of this char.
434-
let witness_version = Fe32::from_char(unchecked.data[0].into()).unwrap();
537+
let witness_version = Fe32::from_char(data_part[0].into()).unwrap();
435538
if witness_version.to_u8() > 16 {
436539
return Err(SegwitHrpstringError::InvalidWitnessVersion(witness_version));
437540
}
@@ -456,6 +559,25 @@ impl<'s> SegwitHrpstring<'s> {
456559
#[inline]
457560
pub fn witness_version(&self) -> Fe32 { self.witness_version }
458561

562+
/// Returns a partial slice of the data part, as ASCII bytes, everything after the witness
563+
/// version and before the checksum.
564+
///
565+
/// The byte values are guaranteed to be valid bech32 characters.
566+
///
567+
/// # Examples
568+
///
569+
/// ```
570+
/// use bech32::{Bech32, primitives::decode::SegwitHrpstring};
571+
///
572+
/// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
573+
/// let ascii = "ar0srrr7xfkvy5l643lydnw9re59gtzz";
574+
///
575+
/// let segwit = SegwitHrpstring::new(&addr).unwrap();
576+
/// assert!(segwit.data_part_ascii_no_witver_no_checksum().iter().eq(ascii.as_bytes().iter()))
577+
/// ```
578+
#[inline]
579+
pub fn data_part_ascii_no_witver_no_checksum(&self) -> &[u8] { self.ascii }
580+
459581
/// Returns an iterator that yields the data part, excluding the witness version, of the parsed
460582
/// bech32 encoded string.
461583
///
@@ -465,12 +587,12 @@ impl<'s> SegwitHrpstring<'s> {
465587
/// Use `self.witness_version()` to get the witness version.
466588
#[inline]
467589
pub fn byte_iter(&self) -> ByteIter {
468-
ByteIter { iter: AsciiToFe32Iter { iter: self.data.iter().copied() }.fes_to_bytes() }
590+
ByteIter { iter: AsciiToFe32Iter { iter: self.ascii.iter().copied() }.fes_to_bytes() }
469591
}
470592
}
471593

472-
/// Checks whether a given HRP string has data characters in the bech32 alphabet (incl. checksum
473-
/// characters), and that the whole string has consistent casing (hrp, data, and checksum).
594+
/// Checks whether a given HRP string has data part characters in the bech32 alphabet (incl.
595+
/// checksum characters), and that the whole string has consistent casing (hrp and data part).
474596
///
475597
/// # Returns
476598
///

0 commit comments

Comments
 (0)