Skip to content

Commit f763534

Browse files
committed
More verbose but more readable implementation of slice_str
This new implementation also has the benefit of allocating at most once.
1 parent b0e6b61 commit f763534

File tree

1 file changed

+61
-41
lines changed

1 file changed

+61
-41
lines changed

src/utils.rs

Lines changed: 61 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -815,61 +815,81 @@ pub(crate) fn char_width(_c: char) -> usize {
815815
///
816816
/// This ensures that escape codes are not screwed up in the process. And if
817817
/// non-empty head and tail are specified, they are inserted between the ANSI
818-
/// symbols from truncated bounds and the slice.
818+
/// codes from truncated bounds and the slice.
819819
pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range<usize>, tail: &str) -> Cow<'a, str> {
820820
#[cfg(feature = "ansi-parsing")]
821821
{
822822
let mut pos = 0;
823-
let mut slice = 0..0;
823+
let mut code_iter = AnsiCodeIterator::new(s).peekable();
824824

825-
// ANSI symbols outside of the slice
825+
// Search for the begining of the slice while collecting heading ANSI
826+
// codes
827+
let mut slice_start = 0;
826828
let mut front_ansi = String::new();
827-
let mut back_ansi = String::new();
828-
829-
// Iterate through each ANSI symbol or unicode character while keeping
830-
// track of:
831-
// - pos: cumulated width of characters iterated so far
832-
// - slice: char indices of the part of the string for which `pos`
833-
// was inside bounds
834-
for (sub, is_ansi) in AnsiCodeIterator::new(s) {
829+
830+
while pos < bounds.start {
831+
let Some((sub, is_ansi)) = code_iter.peek_mut() else {
832+
break;
833+
};
834+
835+
if *is_ansi {
836+
front_ansi.push_str(sub);
837+
slice_start += sub.len();
838+
} else if let Some(c) = sub.chars().next() {
839+
// Pop the head char of `sub` while keeping `sub` on top of
840+
// the iterator
841+
pos += char_width(c);
842+
slice_start += c.len_utf8();
843+
*sub = &sub[c.len_utf8()..];
844+
continue;
845+
}
846+
847+
code_iter.next();
848+
}
849+
850+
// Search for the end of the slice
851+
let mut slice_end = slice_start;
852+
853+
'search_slice_end: for (sub, is_ansi) in &mut code_iter {
835854
if is_ansi {
836-
if pos < bounds.start {
837-
// An ANSI symbol before the interval: keep for later
838-
front_ansi.push_str(sub);
839-
slice.start += sub.len();
840-
slice.end = slice.start;
841-
} else if pos <= bounds.end {
842-
// An ANSI symbol inside of the interval: extend the slice
843-
slice.end += sub.len();
844-
} else {
845-
// An ANSI symbol after the interval: keep for later
846-
back_ansi.push_str(sub);
847-
}
848-
} else {
849-
for c in sub.chars() {
850-
let c_width = char_width(c);
851-
852-
if pos < bounds.start {
853-
// The char is before the interval: move the slice back
854-
slice.start += c.len_utf8();
855-
slice.end = slice.start;
856-
} else if pos + c_width <= bounds.end {
857-
// The char fits into the interval: extend the slice
858-
slice.end += c.len_utf8();
859-
}
855+
slice_end += sub.len();
856+
continue;
857+
}
860858

861-
pos += c_width;
859+
for c in sub.chars() {
860+
let c_width = char_width(c);
861+
862+
if pos + c_width > bounds.end {
863+
// We will only search for ANSI codes after breaking this
864+
// loop, so we can safely drop the remaining of `sub`
865+
break 'search_slice_end;
862866
}
867+
868+
pos += c_width;
869+
slice_end += c.len_utf8();
863870
}
864871
}
865872

866-
let slice = &s[slice];
873+
// Initialise the result, no allocation may have to be performed if
874+
// both head and front are empty
875+
let slice = &s[slice_start..slice_end];
867876

868-
if front_ansi.is_empty() && back_ansi.is_empty() && head.is_empty() && tail.is_empty() {
869-
Cow::Borrowed(slice)
870-
} else {
871-
Cow::Owned(front_ansi + head + slice + tail + &back_ansi)
877+
let mut result = {
878+
if front_ansi.is_empty() && head.is_empty() && tail.is_empty() {
879+
Cow::Borrowed(slice)
880+
} else {
881+
Cow::Owned(front_ansi + head + slice + tail)
882+
}
883+
};
884+
885+
// Push back remaining ANSI codes to result
886+
for (sub, is_ansi) in code_iter {
887+
if is_ansi {
888+
*result.to_mut() += sub;
889+
}
872890
}
891+
892+
result
873893
}
874894
#[cfg(not(feature = "ansi-parsing"))]
875895
{

0 commit comments

Comments
 (0)