Skip to content

Commit 98cff69

Browse files
committed
Move copy of undef_mask into allocation
This also means that the compressed representation chosen may be optimized together with any changes to the undef_mask.
1 parent 2228b3f commit 98cff69

File tree

2 files changed

+88
-55
lines changed

2 files changed

+88
-55
lines changed

src/librustc/mir/interpret/allocation.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,91 @@ impl<'tcx, Tag, Extra> Allocation<Tag, Extra> {
566566
}
567567
}
568568

569+
/// Run-length encoding of the undef mask.
570+
/// Used to copy parts of a mask multiple times to another allocation.
571+
pub struct AllocationDefinedness {
572+
ranges: smallvec::SmallVec::<[u64; 1]>,
573+
first: bool,
574+
}
575+
576+
/// Transferring the definedness mask to other allocations.
577+
impl<Tag, Extra> Allocation<Tag, Extra> {
578+
/// Creates a run-length encoding of the undef_mask.
579+
pub fn compress_defined_range(
580+
&self,
581+
src: Pointer<Tag>,
582+
size: Size,
583+
) -> AllocationDefinedness {
584+
// Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`),
585+
// a naive undef mask copying algorithm would repeatedly have to read the undef mask from
586+
// the source and write it to the destination. Even if we optimized the memory accesses,
587+
// we'd be doing all of this `repeat` times.
588+
// Therefor we precompute a compressed version of the undef mask of the source value and
589+
// then write it back `repeat` times without computing any more information from the source.
590+
591+
// a precomputed cache for ranges of defined/undefined bits
592+
// 0000010010001110 will become
593+
// [5, 1, 2, 1, 3, 3, 1]
594+
// where each element toggles the state
595+
596+
let mut ranges = smallvec::SmallVec::<[u64; 1]>::new();
597+
let first = self.undef_mask.get(src.offset);
598+
let mut cur_len = 1;
599+
let mut cur = first;
600+
601+
for i in 1..size.bytes() {
602+
// FIXME: optimize to bitshift the current undef block's bits and read the top bit
603+
if self.undef_mask.get(src.offset + Size::from_bytes(i)) == cur {
604+
cur_len += 1;
605+
} else {
606+
ranges.push(cur_len);
607+
cur_len = 1;
608+
cur = !cur;
609+
}
610+
}
611+
612+
ranges.push(cur_len);
613+
614+
AllocationDefinedness { ranges, first, }
615+
}
616+
617+
/// Apply multiple instances of the run-length encoding to the undef_mask.
618+
pub fn mark_compressed_range(
619+
&mut self,
620+
defined: &AllocationDefinedness,
621+
dest: Pointer<Tag>,
622+
size: Size,
623+
repeat: u64,
624+
) {
625+
// an optimization where we can just overwrite an entire range of definedness bits if
626+
// they are going to be uniformly `1` or `0`.
627+
if defined.ranges.len() <= 1 {
628+
self.undef_mask.set_range_inbounds(
629+
dest.offset,
630+
dest.offset + size * repeat,
631+
defined.first,
632+
);
633+
return;
634+
}
635+
636+
for mut j in 0..repeat {
637+
j *= size.bytes();
638+
j += dest.offset.bytes();
639+
let mut cur = defined.first;
640+
for range in &defined.ranges {
641+
let old_j = j;
642+
j += range;
643+
self.undef_mask.set_range_inbounds(
644+
Size::from_bytes(old_j),
645+
Size::from_bytes(j),
646+
cur,
647+
);
648+
cur = !cur;
649+
}
650+
}
651+
}
652+
}
653+
569654
/// Relocations
570655
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, RustcEncodable, RustcDecodable)]
571656
pub struct Relocations<Tag=(), Id=AllocId>(SortedMap<Size, (Tag, Id)>);

src/librustc_mir/interpret/memory.rs

Lines changed: 3 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -894,65 +894,13 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
894894
// The bits have to be saved locally before writing to dest in case src and dest overlap.
895895
assert_eq!(size.bytes() as usize as u64, size.bytes());
896896

897-
let undef_mask = &self.get(src.alloc_id)?.undef_mask;
898-
899-
// Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`),
900-
// a naive undef mask copying algorithm would repeatedly have to read the undef mask from
901-
// the source and write it to the destination. Even if we optimized the memory accesses,
902-
// we'd be doing all of this `repeat` times.
903-
// Therefor we precompute a compressed version of the undef mask of the source value and
904-
// then write it back `repeat` times without computing any more information from the source.
905-
906-
// a precomputed cache for ranges of defined/undefined bits
907-
// 0000010010001110 will become
908-
// [5, 1, 2, 1, 3, 3, 1]
909-
// where each element toggles the state
910-
let mut ranges = smallvec::SmallVec::<[u64; 1]>::new();
911-
let first = undef_mask.get(src.offset);
912-
let mut cur_len = 1;
913-
let mut cur = first;
914-
for i in 1..size.bytes() {
915-
// FIXME: optimize to bitshift the current undef block's bits and read the top bit
916-
if undef_mask.get(src.offset + Size::from_bytes(i)) == cur {
917-
cur_len += 1;
918-
} else {
919-
ranges.push(cur_len);
920-
cur_len = 1;
921-
cur = !cur;
922-
}
923-
}
897+
let src_alloc = self.get(src.alloc_id)?;
898+
let compressed = src_alloc.compress_defined_range(src, size);
924899

925900
// now fill in all the data
926901
let dest_allocation = self.get_mut(dest.alloc_id)?;
927-
// an optimization where we can just overwrite an entire range of definedness bits if
928-
// they are going to be uniformly `1` or `0`.
929-
if ranges.is_empty() {
930-
dest_allocation.undef_mask.set_range_inbounds(
931-
dest.offset,
932-
dest.offset + size * repeat,
933-
first,
934-
);
935-
return Ok(())
936-
}
902+
dest_allocation.mark_compressed_range(&compressed, dest, size, repeat);
937903

938-
// remember to fill in the trailing bits
939-
ranges.push(cur_len);
940-
941-
for mut j in 0..repeat {
942-
j *= size.bytes();
943-
j += dest.offset.bytes();
944-
let mut cur = first;
945-
for range in &ranges {
946-
let old_j = j;
947-
j += range;
948-
dest_allocation.undef_mask.set_range_inbounds(
949-
Size::from_bytes(old_j),
950-
Size::from_bytes(j),
951-
cur,
952-
);
953-
cur = !cur;
954-
}
955-
}
956904
Ok(())
957905
}
958906

0 commit comments

Comments
 (0)