Add new function ansi::slice_ansi_str

remi-dupre · remi-dupre · commit 047fea2d8de2 · 2024-02-07T18:28:08.000+01:00
I also took my chance and suggested an non-allocating version of
measure_text_width.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "console"
 description = "A terminal and console abstraction for Rust"
-version = "0.15.8"
+version = "0.16.0"
 keywords = ["cli", "terminal", "colors", "console", "ansi"]
 authors = ["Armin Ronacher <armin.ronacher@active-4.com>"]
 license = "MIT"
diff --git a/src/ansi.rs b/src/ansi.rs
@@ -4,6 +4,8 @@ use std::{
     str::CharIndices,
 };
 
+use crate::utils::char_width;
+
 #[derive(Debug, Clone, Copy)]
 enum State {
     Start,
@@ -267,8 +269,63 @@ impl<'a> Iterator for AnsiCodeIterator<'a> {
 
 impl<'a> FusedIterator for AnsiCodeIterator<'a> {}
 
+/// Slice a `&str` in terms of text width. This means that only the text
+/// columns strictly between `start` and `stop` will be kept.
+///
+/// If a multi-columns character overlaps with the end of the interval it will
+/// not be included. In such a case, the result will be less than `end - start`
+/// columns wide.
+pub fn slice_ansi_str(s: &str, start: usize, end: usize) -> &str {
+    if end <= start {
+        return "";
+    }
+
+    let mut pos = 0;
+    let mut res_start = 0;
+    let mut res_end = 0;
+
+    'outer: for (sub, is_ansi) in AnsiCodeIterator::new(s) {
+        // As ansi symbols have a width of 0 we can safely early-interupt
+        // the outer for loop only if current pos strictly greater than
+        // `end`.
+        if pos > end {
+            break;
+        }
+
+        if is_ansi {
+            if pos < start {
+                res_start += sub.len();
+                res_end = res_start;
+            } else if pos <= end {
+                res_end += sub.len();
+            } else {
+                break 'outer;
+            }
+        } else {
+            for c in sub.chars() {
+                let c_width = char_width(c);
+
+                if pos < start {
+                    res_start += c.len_utf8();
+                    res_end = res_start;
+                } else if pos + c_width <= end {
+                    res_end += c.len_utf8();
+                } else {
+                    break 'outer;
+                }
+
+                pos += char_width(c);
+            }
+        }
+    }
+
+    &s[res_start..res_end]
+}
+
 #[cfg(test)]
 mod tests {
+    use crate::measure_text_width;
+
     use super::*;
 
     use lazy_static::lazy_static;
@@ -435,4 +492,37 @@ mod tests {
         assert_eq!(iter.rest_slice(), "");
         assert_eq!(iter.next(), None);
     }
+
+    #[test]
+    fn test_slice_ansi_str() {
+        // Note that 🐶 is two columns wide
+        let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
+        assert_eq!(slice_ansi_str(test_str, 5, 5), "");
+        assert_eq!(slice_ansi_str(test_str, 0, test_str.len()), test_str);
+
+        if cfg!(feature = "unicode-width") {
+            assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
+            assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
+            assert_eq!(measure_text_width(test_str), 16);
+            assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
+            assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
+            assert_eq!(slice_ansi_str(test_str, 0, 7), "Hello\x1b[31m🐶\x1b[1m");
+            assert_eq!(slice_ansi_str(test_str, 7, 21), "\x1b[1m🐶\x1b[0m world!");
+            assert_eq!(slice_ansi_str(test_str, 8, 21), "\x1b[0m world!");
+            assert_eq!(slice_ansi_str(test_str, 9, 21), "\x1b[0m world!");
+
+            assert_eq!(
+                slice_ansi_str(test_str, 4, 9),
+                "o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
+            );
+        } else {
+            assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
+            assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m🐶\u{1b}[1m");
+
+            assert_eq!(
+                slice_ansi_str(test_str, 4, 9),
+                "o\x1b[31m🐶\x1b[1m🐶\x1b[0m w"
+            );
+        }
+    }
 }
diff --git a/src/lib.rs b/src/lib.rs
@@ -87,7 +87,7 @@ pub use crate::utils::{
 };
 
 #[cfg(feature = "ansi-parsing")]
-pub use crate::ansi::{strip_ansi_codes, AnsiCodeIterator};
+pub use crate::ansi::{slice_ansi_str, strip_ansi_codes, AnsiCodeIterator};
 
 mod common_term;
 mod kb;
diff --git a/src/utils.rs b/src/utils.rs
@@ -9,7 +9,7 @@ use lazy_static::lazy_static;
 use crate::term::{wants_emoji, Term};
 
 #[cfg(feature = "ansi-parsing")]
-use crate::ansi::{strip_ansi_codes, AnsiCodeIterator};
+use crate::ansi::AnsiCodeIterator;
 
 #[cfg(not(feature = "ansi-parsing"))]
 fn strip_ansi_codes(s: &str) -> &str {
@@ -71,7 +71,17 @@ pub fn set_colors_enabled_stderr(val: bool) {
 
 /// Measure the width of a string in terminal characters.
 pub fn measure_text_width(s: &str) -> usize {
-    str_width(&strip_ansi_codes(s))
+    #[cfg(feature = "ansi-parsing")]
+    {
+        AnsiCodeIterator::new(s)
+            .filter(|(_, is_ansi)| !is_ansi)
+            .map(|(sub, _)| str_width(sub))
+            .sum()
+    }
+    #[cfg(not(feature = "ansi-parsing"))]
+    {
+        str_width(s)
+    }
 }
 
 /// A terminal color.
@@ -719,7 +729,7 @@ fn str_width(s: &str) -> usize {
 }
 
 #[cfg(feature = "ansi-parsing")]
-fn char_width(c: char) -> usize {
+pub(crate) fn char_width(c: char) -> usize {
     #[cfg(feature = "unicode-width")]
     {
         use unicode_width::UnicodeWidthChar;
@@ -868,15 +878,18 @@ fn test_text_width() {
         .on_black()
         .bold()
         .force_styling(true)
-        .to_string();
+        .to_string()
+        + "🐶bar";
     assert_eq!(
         measure_text_width(&s),
-        if cfg!(feature = "ansi-parsing") {
-            3
-        } else if cfg!(feature = "unicode-width") {
-            17
-        } else {
-            21
+        match (
+            cfg!(feature = "ansi-parsing"),
+            cfg!(feature = "unicode-width")
+        ) {
+            (true, true) => 8,
+            (true, false) => 7,
+            (false, true) => 22,
+            (false, false) => 25,
         }
     );
 }