Skip to content
This repository was archived by the owner on Jun 10, 2024. It is now read-only.

Commit 4419621

Browse files
committed
fix: address missing functionality in binary matching/construction
1 parent cca9c6f commit 4419621

File tree

8 files changed

+654
-159
lines changed

8 files changed

+654
-159
lines changed

compiler/binary/src/bitvec.rs

Lines changed: 113 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use alloc::alloc::{Allocator, Global};
22
use alloc::vec::Vec;
33
use core::fmt;
44
use core::hash::{Hash, Hasher};
5+
use core::iter::Extend;
56
use core::mem;
67

78
use num_bigint::BigInt;
@@ -106,6 +107,12 @@ impl<A: Allocator> BitVec<A> {
106107
pub fn matcher<'a>(&'a self) -> Matcher<'a> {
107108
Matcher::new(self.select())
108109
}
110+
111+
/// Returns the available capacity in bytes of the underlying buffer
112+
#[inline]
113+
fn bytes_available(&self) -> usize {
114+
self.data.capacity() - self.byte_size()
115+
}
109116
}
110117

111118
impl<A: Allocator> Bitstring for BitVec<A> {
@@ -431,11 +438,15 @@ impl<A: Allocator> BitVec<A> {
431438

432439
#[inline]
433440
fn reserve(&mut self, cap: usize) {
434-
if (self.pos + cap + 1) >= self.data.capacity() {
435-
self.data.reserve(cap + mem::size_of::<usize>());
436-
unsafe {
437-
self.data.set_len(self.data.capacity());
438-
}
441+
let available = self.bytes_available();
442+
if available >= cap {
443+
return;
444+
}
445+
// Buffer some additional capacity above and beyond
446+
let cap = mem::size_of::<usize>() + cap;
447+
self.data.reserve(cap - available);
448+
unsafe {
449+
self.data.set_len(self.data.capacity());
439450
}
440451
}
441452

@@ -581,6 +592,14 @@ impl<A: Allocator> BitVec<A> {
581592
self.bit_offset = size - offset_shift;
582593
}
583594
}
595+
596+
/// This is the fallback implementation for `extend` for cases where we don't
597+
/// know the length of the iterator
598+
fn default_extend<I: Iterator<Item = u8>>(&mut self, iter: I) {
599+
for byte in iter {
600+
self.push_byte(byte);
601+
}
602+
}
584603
}
585604
impl<A: Allocator> Eq for BitVec<A> {}
586605
impl<A: Allocator, T: ?Sized + Bitstring> PartialEq<T> for BitVec<A> {
@@ -692,6 +711,92 @@ impl<A: Allocator> std::io::Write for BitVec<A> {
692711
Ok(())
693712
}
694713
}
714+
impl Extend<u8> for BitVec {
715+
fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
716+
<Self as SpecExtend<T::IntoIter>>::spec_extend(self, iter.into_iter())
717+
}
718+
719+
#[inline]
720+
fn extend_one(&mut self, byte: u8) {
721+
self.push_byte(byte);
722+
}
723+
724+
#[inline]
725+
fn extend_reserve(&mut self, additional: usize) {
726+
self.reserve(additional)
727+
}
728+
}
729+
730+
trait SpecExtend<I> {
731+
fn spec_extend(&mut self, iter: I);
732+
}
733+
impl<I: Iterator<Item = u8>> SpecExtend<I> for BitVec {
734+
default fn spec_extend(&mut self, iter: I) {
735+
self.default_extend(iter)
736+
}
737+
}
738+
impl<'a> SpecExtend<ByteIter<'a>> for BitVec {
739+
fn spec_extend(&mut self, iter: ByteIter<'a>) {
740+
match iter.as_slice() {
741+
Some(bytes) => self.push_bytes(bytes),
742+
None => {
743+
self.reserve(iter.len());
744+
if self.bit_offset == 0 {
745+
for byte in iter {
746+
unsafe {
747+
self.push_byte_fast(byte);
748+
}
749+
}
750+
} else {
751+
for byte in iter {
752+
unsafe {
753+
self.push_byte_slow(byte);
754+
}
755+
}
756+
}
757+
}
758+
}
759+
}
760+
}
761+
impl<'a> SpecExtend<BitsIter<'a>> for BitVec {
762+
fn spec_extend(&mut self, mut iter: BitsIter<'a>) {
763+
match iter.as_slice() {
764+
Some(bytes) => self.push_bytes(bytes),
765+
None => {
766+
self.reserve(iter.byte_size());
767+
if self.bit_offset == 0 {
768+
loop {
769+
match iter.next() {
770+
Some(byte) => unsafe { self.push_byte_fast(byte) },
771+
None => {
772+
if let Some(b) = iter.consume() {
773+
unsafe {
774+
self.push_partial_byte(b.byte(), b.size);
775+
}
776+
}
777+
break;
778+
}
779+
}
780+
}
781+
} else {
782+
loop {
783+
match iter.next() {
784+
Some(byte) => unsafe { self.push_byte_slow(byte) },
785+
None => {
786+
if let Some(b) = iter.consume() {
787+
unsafe {
788+
self.push_partial_byte(b.byte(), b.size);
789+
}
790+
}
791+
break;
792+
}
793+
}
794+
}
795+
}
796+
}
797+
}
798+
}
799+
}
695800

696801
#[cfg(test)]
697802
mod test {
@@ -899,9 +1004,9 @@ mod test {
8991004
vec.clear();
9001005

9011006
vec.push_utf16('〦', Endianness::Native);
902-
assert_eq!(vec.byte_size(), 4);
903-
assert_eq!(vec.bit_size(), 32);
904-
assert_eq!(vec.pos, 4);
1007+
assert_eq!(vec.byte_size(), 2);
1008+
assert_eq!(vec.bit_size(), 16);
1009+
assert_eq!(vec.pos, 2);
9051010
assert_eq!(vec.bit_offset, 0);
9061011

9071012
vec.clear();

compiler/binary/src/iter.rs

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
use core::iter;
22

3-
use crate::{Bitstring, Selection};
3+
use crate::{Bitstring, MaybePartialByte, Selection};
44

55
/// Represents iteration over the bytes in a selection, which may constitute
66
/// either a binary or bitstring.
77
///
88
/// Iteration may produce a trailing partial byte, of which all unused bits will
99
/// be zeroed.
10+
#[derive(Debug)]
1011
pub struct ByteIter<'a> {
1112
selection: Selection<'a>,
1213
}
@@ -28,6 +29,13 @@ impl<'a> ByteIter<'a> {
2829
selection: Selection::all(data),
2930
}
3031
}
32+
33+
/// In some cases, the underlying bytes can be directly accessed allowing for
34+
/// more optimal access patterns, see SpecExtend impl for BitVec for an example
35+
#[inline]
36+
pub fn as_slice(&self) -> Option<&[u8]> {
37+
self.selection.as_bytes()
38+
}
3139
}
3240
impl<'a> Iterator for ByteIter<'a> {
3341
type Item = u8;
@@ -57,6 +65,89 @@ impl<'a> iter::ExactSizeIterator for ByteIter<'a> {
5765
impl<'a> iter::FusedIterator for ByteIter<'a> {}
5866
unsafe impl<'a> iter::TrustedLen for ByteIter<'a> {}
5967

68+
/// Like `ByteIter`, but intended for cases where special care must be
69+
/// taken around a trailing partial byte, if one is present. This iterator
70+
/// works like `ByteIter` until it encounters a trailing partial byte, in which
71+
/// case it will NOT emit the final byte at all, and instead it must be requested
72+
/// explicitly from the iterator and handled manually.
73+
#[derive(Debug)]
74+
pub struct BitsIter<'a> {
75+
selection: Selection<'a>,
76+
}
77+
impl<'a> Clone for BitsIter<'a> {
78+
#[inline]
79+
fn clone(&self) -> Self {
80+
Self {
81+
selection: self.selection,
82+
}
83+
}
84+
}
85+
impl<'a> BitsIter<'a> {
86+
pub fn new(selection: Selection<'a>) -> Self {
87+
Self { selection }
88+
}
89+
90+
/// Returns the size in bytes (including trailing partial byte) of the underlying selection
91+
#[inline]
92+
pub fn byte_size(&self) -> usize {
93+
self.selection.byte_size()
94+
}
95+
96+
/// Takes the selection from this iterator, consuming it
97+
///
98+
/// This is how users of this iterator must consume the final trailing byte.
99+
#[inline]
100+
pub fn consume(self) -> Option<MaybePartialByte> {
101+
match self.selection {
102+
Selection::Byte(b) if b.is_partial() => Some(b),
103+
_ => None,
104+
}
105+
}
106+
107+
/// In some cases, the underlying bytes can be directly accessed allowing for
108+
/// more optimal access patterns, see SpecExtend impl for BitVec for an example
109+
#[inline]
110+
pub fn as_slice(&self) -> Option<&[u8]> {
111+
self.selection.as_bytes()
112+
}
113+
}
114+
impl<'a> Iterator for BitsIter<'a> {
115+
type Item = u8;
116+
117+
fn next(&mut self) -> Option<Self::Item> {
118+
match &self.selection {
119+
Selection::Empty => None,
120+
Selection::Byte(b) if b.is_partial() => None,
121+
_ => self.selection.pop(),
122+
}
123+
}
124+
125+
fn size_hint(&self) -> (usize, Option<usize>) {
126+
let len = self.len();
127+
(len, Some(len))
128+
}
129+
130+
fn count(self) -> usize {
131+
self.len()
132+
}
133+
}
134+
impl<'a> iter::ExactSizeIterator for BitsIter<'a> {
135+
fn len(&self) -> usize {
136+
let size = self.selection.byte_size();
137+
match self.selection.trailing_bits() {
138+
0 => size,
139+
_ if size == 0 => 0,
140+
_ => size - 1,
141+
}
142+
}
143+
144+
fn is_empty(&self) -> bool {
145+
self.len() == 0
146+
}
147+
}
148+
impl<'a> iter::FusedIterator for BitsIter<'a> {}
149+
unsafe impl<'a> iter::TrustedLen for BitsIter<'a> {}
150+
60151
#[cfg(test)]
61152
mod tests {
62153
use super::*;

compiler/binary/src/lib.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#![feature(const_option_ext)]
99
#![feature(slice_take)]
1010
#![feature(arbitrary_enum_discriminant)]
11+
#![feature(min_specialization)]
12+
#![feature(extend_one)]
1113

1214
extern crate alloc;
1315
#[cfg(any(test, feature = "std"))]
@@ -28,7 +30,7 @@ mod traits;
2830

2931
pub use self::bitvec::BitVec;
3032
pub use self::flags::{BinaryFlags, Encoding};
31-
pub use self::iter::ByteIter;
33+
pub use self::iter::{BitsIter, ByteIter};
3234
pub use self::matcher::Matcher;
3335
pub use self::select::{MaybePartialByte, Selection};
3436
pub use self::spec::BinaryEntrySpecifier;
@@ -53,10 +55,10 @@ pub use self::traits::{Aligned, Binary, Bitstring, FromEndianBytes, ToEndianByte
5355
///
5456
/// So lets apply that to an example, a 16-bit integer 64542, as viewed on a little-endian machine:
5557
///
56-
/// * 0xfc1e (little-endian hex)
57-
/// * 0x1efc (big-endian hex)
58-
/// * 0b1111110000011110 (little-endian binary)
59-
/// * 0b0001111011111100 (big-endian binary)
58+
/// * 0xfc1e (little-endian hex)
59+
/// * 0x1efc (big-endian hex)
60+
/// * 0b1111110000011110 (little-endian binary)
61+
/// * 0b0001111011111100 (big-endian binary)
6062
///
6163
/// Well that's confusing, The bytes appear to be backwards! The little-endian version has the most-significant bits in the least-significant
6264
/// byte, and the big-endian version has the least-significant bits in the most-significant byte. What's going on here?

0 commit comments

Comments
 (0)