Skip to content

Commit b0e6b61

Browse files
committed
Make slice_str similar to truncate_str
1 parent d8bb2fc commit b0e6b61

File tree

3 files changed

+120
-149
lines changed

3 files changed

+120
-149
lines changed

src/ansi.rs

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ use std::{
44
str::CharIndices,
55
};
66

7-
use crate::utils::char_width;
8-
97
#[derive(Debug, Clone, Copy)]
108
enum State {
119
Start,
@@ -269,63 +267,8 @@ impl<'a> Iterator for AnsiCodeIterator<'a> {
269267

270268
impl FusedIterator for AnsiCodeIterator<'_> {}
271269

272-
/// Slice a `&str` in terms of text width. This means that only the text
273-
/// columns strictly between `start` and `stop` will be kept.
274-
///
275-
/// If a multi-columns character overlaps with the end of the interval it will
276-
/// not be included. In such a case, the result will be less than `end - start`
277-
/// columns wide.
278-
pub fn slice_ansi_str(s: &str, start: usize, end: usize) -> &str {
279-
if end <= start {
280-
return "";
281-
}
282-
283-
let mut pos = 0;
284-
let mut res_start = 0;
285-
let mut res_end = 0;
286-
287-
'outer: for (sub, is_ansi) in AnsiCodeIterator::new(s) {
288-
// As ansi symbols have a width of 0 we can safely early-interupt
289-
// the outer for loop only if current pos strictly greater than
290-
// `end`.
291-
if pos > end {
292-
break;
293-
}
294-
295-
if is_ansi {
296-
if pos < start {
297-
res_start += sub.len();
298-
res_end = res_start;
299-
} else if pos <= end {
300-
res_end += sub.len();
301-
} else {
302-
break 'outer;
303-
}
304-
} else {
305-
for c in sub.chars() {
306-
let c_width = char_width(c);
307-
308-
if pos < start {
309-
res_start += c.len_utf8();
310-
res_end = res_start;
311-
} else if pos + c_width <= end {
312-
res_end += c.len_utf8();
313-
} else {
314-
break 'outer;
315-
}
316-
317-
pos += char_width(c);
318-
}
319-
}
320-
}
321-
322-
&s[res_start..res_end]
323-
}
324-
325270
#[cfg(test)]
326271
mod tests {
327-
use crate::measure_text_width;
328-
329272
use super::*;
330273

331274
use once_cell::sync::Lazy;
@@ -492,37 +435,4 @@ mod tests {
492435
assert_eq!(iter.rest_slice(), "");
493436
assert_eq!(iter.next(), None);
494437
}
495-
496-
#[test]
497-
fn test_slice_ansi_str() {
498-
// Note that 🐶 is two columns wide
499-
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
500-
assert_eq!(slice_ansi_str(test_str, 5, 5), "");
501-
assert_eq!(slice_ansi_str(test_str, 0, test_str.len()), test_str);
502-
503-
if cfg!(feature = "unicode-width") {
504-
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
505-
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
506-
assert_eq!(measure_text_width(test_str), 16);
507-
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
508-
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
509-
assert_eq!(slice_ansi_str(test_str, 0, 7), "Hello\x1b[31m🐶\x1b[1m");
510-
assert_eq!(slice_ansi_str(test_str, 7, 21), "\x1b[1m🐶\x1b[0m world!");
511-
assert_eq!(slice_ansi_str(test_str, 8, 21), "\x1b[0m world!");
512-
assert_eq!(slice_ansi_str(test_str, 9, 21), "\x1b[0m world!");
513-
514-
assert_eq!(
515-
slice_ansi_str(test_str, 4, 9),
516-
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
517-
);
518-
} else {
519-
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
520-
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m🐶\u{1b}[1m");
521-
522-
assert_eq!(
523-
slice_ansi_str(test_str, 4, 9),
524-
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m w"
525-
);
526-
}
527-
}
528438
}

src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,12 @@ pub use crate::term::{
8484
};
8585
pub use crate::utils::{
8686
colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with,
87-
set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute,
88-
Color, Emoji, Style, StyledObject,
87+
set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment,
88+
Attribute, Color, Emoji, Style, StyledObject,
8989
};
9090

9191
#[cfg(feature = "ansi-parsing")]
92-
pub use crate::ansi::{slice_ansi_str, strip_ansi_codes, AnsiCodeIterator};
92+
pub use crate::ansi::{strip_ansi_codes, AnsiCodeIterator};
9393

9494
mod common_term;
9595
mod kb;

src/utils.rs

Lines changed: 117 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::borrow::Cow;
22
use std::env;
33
use std::fmt;
44
use std::fmt::{Debug, Formatter};
5+
use std::ops::Range;
56
use std::sync::atomic::{AtomicBool, Ordering};
67

78
use once_cell::sync::Lazy;
@@ -787,7 +788,7 @@ fn str_width(s: &str) -> usize {
787788
}
788789

789790
#[cfg(feature = "ansi-parsing")]
790-
pub(crate) fn char_width(c: char) -> usize {
791+
fn char_width(c: char) -> usize {
791792
#[cfg(feature = "unicode-width")]
792793
{
793794
use unicode_width::UnicodeWidthChar;
@@ -805,80 +806,98 @@ pub(crate) fn char_width(_c: char) -> usize {
805806
1
806807
}
807808

808-
/// Truncates a string to a certain number of characters.
809+
/// Slice a `&str` in terms of text width. This means that only the text
810+
/// columns strictly between `start` and `stop` will be kept.
809811
///
810-
/// This ensures that escape codes are not screwed up in the process.
811-
/// If the maximum length is hit the string will be truncated but
812-
/// escapes code will still be honored. If truncation takes place
813-
/// the tail string will be appended.
814-
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
812+
/// If a multi-columns character overlaps with the end of the interval it will
813+
/// not be included. In such a case, the result will be less than `end - start`
814+
/// columns wide.
815+
///
816+
/// This ensures that escape codes are not screwed up in the process. And if
817+
/// non-empty head and tail are specified, they are inserted between the ANSI
818+
/// symbols from truncated bounds and the slice.
819+
pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range<usize>, tail: &str) -> Cow<'a, str> {
815820
#[cfg(feature = "ansi-parsing")]
816821
{
817-
use std::cmp::Ordering;
818-
let mut iter = AnsiCodeIterator::new(s);
819-
let mut length = 0;
820-
let mut rv = None;
821-
822-
while let Some(item) = iter.next() {
823-
match item {
824-
(s, false) => {
825-
if rv.is_none() {
826-
if str_width(s) + length > width - str_width(tail) {
827-
let ts = iter.current_slice();
828-
829-
let mut s_byte = 0;
830-
let mut s_width = 0;
831-
let rest_width = width - str_width(tail) - length;
832-
for c in s.chars() {
833-
s_byte += c.len_utf8();
834-
s_width += char_width(c);
835-
match s_width.cmp(&rest_width) {
836-
Ordering::Equal => break,
837-
Ordering::Greater => {
838-
s_byte -= c.len_utf8();
839-
break;
840-
}
841-
Ordering::Less => continue,
842-
}
843-
}
844-
845-
let idx = ts.len() - s.len() + s_byte;
846-
let mut buf = ts[..idx].to_string();
847-
buf.push_str(tail);
848-
rv = Some(buf);
849-
}
850-
length += str_width(s);
851-
}
822+
let mut pos = 0;
823+
let mut slice = 0..0;
824+
825+
// ANSI symbols outside of the slice
826+
let mut front_ansi = String::new();
827+
let mut back_ansi = String::new();
828+
829+
// Iterate through each ANSI symbol or unicode character while keeping
830+
// track of:
831+
// - pos: cumulated width of characters iterated so far
832+
// - slice: char indices of the part of the string for which `pos`
833+
// was inside bounds
834+
for (sub, is_ansi) in AnsiCodeIterator::new(s) {
835+
if is_ansi {
836+
if pos < bounds.start {
837+
// An ANSI symbol before the interval: keep for later
838+
front_ansi.push_str(sub);
839+
slice.start += sub.len();
840+
slice.end = slice.start;
841+
} else if pos <= bounds.end {
842+
// An ANSI symbol inside of the interval: extend the slice
843+
slice.end += sub.len();
844+
} else {
845+
// An ANSI symbol after the interval: keep for later
846+
back_ansi.push_str(sub);
852847
}
853-
(s, true) => {
854-
if let Some(ref mut rv) = rv {
855-
rv.push_str(s);
848+
} else {
849+
for c in sub.chars() {
850+
let c_width = char_width(c);
851+
852+
if pos < bounds.start {
853+
// The char is before the interval: move the slice back
854+
slice.start += c.len_utf8();
855+
slice.end = slice.start;
856+
} else if pos + c_width <= bounds.end {
857+
// The char fits into the interval: extend the slice
858+
slice.end += c.len_utf8();
856859
}
860+
861+
pos += c_width;
857862
}
858863
}
859864
}
860865

861-
if let Some(buf) = rv {
862-
Cow::Owned(buf)
866+
let slice = &s[slice];
867+
868+
if front_ansi.is_empty() && back_ansi.is_empty() && head.is_empty() && tail.is_empty() {
869+
Cow::Borrowed(slice)
863870
} else {
864-
Cow::Borrowed(s)
871+
Cow::Owned(front_ansi + head + slice + tail + &back_ansi)
865872
}
866873
}
867-
868874
#[cfg(not(feature = "ansi-parsing"))]
869875
{
870-
if s.len() <= width - tail.len() {
871-
Cow::Borrowed(s)
876+
let slice = s.get(bounds).unwrap_or("");
877+
878+
if head.is_empty() && tail.is_empty() {
879+
Cow::Borrowed(slice)
872880
} else {
873-
Cow::Owned(format!(
874-
"{}{}",
875-
s.get(..width - tail.len()).unwrap_or_default(),
876-
tail
877-
))
881+
Cow::Owned(format!("{head}{slice}{tail}"))
878882
}
879883
}
880884
}
881885

886+
/// Truncates a string to a certain number of characters.
887+
///
888+
/// This ensures that escape codes are not screwed up in the process.
889+
/// If the maximum length is hit the string will be truncated but
890+
/// escapes code will still be honored. If truncation takes place
891+
/// the tail string will be appended.
892+
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
893+
if measure_text_width(s) > width {
894+
let tail_width = measure_text_width(tail);
895+
slice_str(s, "", 0..width.saturating_sub(tail_width), tail)
896+
} else {
897+
Cow::Borrowed(s)
898+
}
899+
}
900+
882901
/// Pads a string to fill a certain number of characters.
883902
///
884903
/// This will honor ansi codes correctly and allows you to align a string
@@ -987,8 +1006,50 @@ fn test_truncate_str() {
9871006
);
9881007
}
9891008

1009+
#[test]
1010+
fn test_slice_ansi_str() {
1011+
// Note that 🐶 is two columns wide
1012+
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
1013+
assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str);
1014+
1015+
if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") {
1016+
assert_eq!(measure_text_width(test_str), 16);
1017+
1018+
assert_eq!(
1019+
slice_str(test_str, "", 5..5, ""),
1020+
"\u{1b}[31m\u{1b}[1m\u{1b}[0m"
1021+
);
1022+
1023+
assert_eq!(
1024+
slice_str(test_str, "", 0..5, ""),
1025+
"Hello\x1b[31m\x1b[1m\x1b[0m"
1026+
);
1027+
1028+
assert_eq!(
1029+
slice_str(test_str, "", 0..6, ""),
1030+
"Hello\x1b[31m\x1b[1m\x1b[0m"
1031+
);
1032+
1033+
assert_eq!(
1034+
slice_str(test_str, "", 0..7, ""),
1035+
"Hello\x1b[31m🐶\x1b[1m\x1b[0m"
1036+
);
1037+
1038+
assert_eq!(
1039+
slice_str(test_str, "", 4..9, ""),
1040+
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
1041+
);
1042+
1043+
assert_eq!(
1044+
slice_str(test_str, "", 7..21, ""),
1045+
"\x1b[31m\x1b[1m🐶\x1b[0m world!"
1046+
);
1047+
}
1048+
}
1049+
9901050
#[test]
9911051
fn test_truncate_str_no_ansi() {
1052+
assert_eq!(&truncate_str("foo bar", 7, "!"), "foo bar");
9921053
assert_eq!(&truncate_str("foo bar", 5, ""), "foo b");
9931054
assert_eq!(&truncate_str("foo bar", 5, "!"), "foo !");
9941055
assert_eq!(&truncate_str("foo bar baz", 10, "..."), "foo bar...");

0 commit comments

Comments
 (0)