@@ -2,6 +2,7 @@ use std::borrow::Cow;
2
2
use std:: env;
3
3
use std:: fmt;
4
4
use std:: fmt:: { Debug , Formatter } ;
5
+ use std:: ops:: Range ;
5
6
use std:: sync:: atomic:: { AtomicBool , Ordering } ;
6
7
7
8
use once_cell:: sync:: Lazy ;
@@ -787,7 +788,7 @@ fn str_width(s: &str) -> usize {
787
788
}
788
789
789
790
#[ cfg( feature = "ansi-parsing" ) ]
790
- pub ( crate ) fn char_width ( c : char ) -> usize {
791
+ fn char_width ( c : char ) -> usize {
791
792
#[ cfg( feature = "unicode-width" ) ]
792
793
{
793
794
use unicode_width:: UnicodeWidthChar ;
@@ -805,80 +806,98 @@ pub(crate) fn char_width(_c: char) -> usize {
805
806
1
806
807
}
807
808
808
- /// Truncates a string to a certain number of characters.
809
+ /// Slice a `&str` in terms of text width. This means that only the text
810
+ /// columns strictly between `start` and `stop` will be kept.
809
811
///
810
- /// This ensures that escape codes are not screwed up in the process.
811
- /// If the maximum length is hit the string will be truncated but
812
- /// escapes code will still be honored. If truncation takes place
813
- /// the tail string will be appended.
814
- pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
812
+ /// If a multi-columns character overlaps with the end of the interval it will
813
+ /// not be included. In such a case, the result will be less than `end - start`
814
+ /// columns wide.
815
+ ///
816
+ /// This ensures that escape codes are not screwed up in the process. And if
817
+ /// non-empty head and tail are specified, they are inserted between the ANSI
818
+ /// symbols from truncated bounds and the slice.
819
+ pub fn slice_str < ' a > ( s : & ' a str , head : & str , bounds : Range < usize > , tail : & str ) -> Cow < ' a , str > {
815
820
#[ cfg( feature = "ansi-parsing" ) ]
816
821
{
817
- use std:: cmp:: Ordering ;
818
- let mut iter = AnsiCodeIterator :: new ( s) ;
819
- let mut length = 0 ;
820
- let mut rv = None ;
821
-
822
- while let Some ( item) = iter. next ( ) {
823
- match item {
824
- ( s, false ) => {
825
- if rv. is_none ( ) {
826
- if str_width ( s) + length > width - str_width ( tail) {
827
- let ts = iter. current_slice ( ) ;
828
-
829
- let mut s_byte = 0 ;
830
- let mut s_width = 0 ;
831
- let rest_width = width - str_width ( tail) - length;
832
- for c in s. chars ( ) {
833
- s_byte += c. len_utf8 ( ) ;
834
- s_width += char_width ( c) ;
835
- match s_width. cmp ( & rest_width) {
836
- Ordering :: Equal => break ,
837
- Ordering :: Greater => {
838
- s_byte -= c. len_utf8 ( ) ;
839
- break ;
840
- }
841
- Ordering :: Less => continue ,
842
- }
843
- }
844
-
845
- let idx = ts. len ( ) - s. len ( ) + s_byte;
846
- let mut buf = ts[ ..idx] . to_string ( ) ;
847
- buf. push_str ( tail) ;
848
- rv = Some ( buf) ;
849
- }
850
- length += str_width ( s) ;
851
- }
822
+ let mut pos = 0 ;
823
+ let mut slice = 0 ..0 ;
824
+
825
+ // ANSI symbols outside of the slice
826
+ let mut front_ansi = String :: new ( ) ;
827
+ let mut back_ansi = String :: new ( ) ;
828
+
829
+ // Iterate through each ANSI symbol or unicode character while keeping
830
+ // track of:
831
+ // - pos: cumulated width of characters iterated so far
832
+ // - slice: char indices of the part of the string for which `pos`
833
+ // was inside bounds
834
+ for ( sub, is_ansi) in AnsiCodeIterator :: new ( s) {
835
+ if is_ansi {
836
+ if pos < bounds. start {
837
+ // An ANSI symbol before the interval: keep for later
838
+ front_ansi. push_str ( sub) ;
839
+ slice. start += sub. len ( ) ;
840
+ slice. end = slice. start ;
841
+ } else if pos <= bounds. end {
842
+ // An ANSI symbol inside of the interval: extend the slice
843
+ slice. end += sub. len ( ) ;
844
+ } else {
845
+ // An ANSI symbol after the interval: keep for later
846
+ back_ansi. push_str ( sub) ;
852
847
}
853
- ( s, true ) => {
854
- if let Some ( ref mut rv) = rv {
855
- rv. push_str ( s) ;
848
+ } else {
849
+ for c in sub. chars ( ) {
850
+ let c_width = char_width ( c) ;
851
+
852
+ if pos < bounds. start {
853
+ // The char is before the interval: move the slice back
854
+ slice. start += c. len_utf8 ( ) ;
855
+ slice. end = slice. start ;
856
+ } else if pos + c_width <= bounds. end {
857
+ // The char fits into the interval: extend the slice
858
+ slice. end += c. len_utf8 ( ) ;
856
859
}
860
+
861
+ pos += c_width;
857
862
}
858
863
}
859
864
}
860
865
861
- if let Some ( buf) = rv {
862
- Cow :: Owned ( buf)
866
+ let slice = & s[ slice] ;
867
+
868
+ if front_ansi. is_empty ( ) && back_ansi. is_empty ( ) && head. is_empty ( ) && tail. is_empty ( ) {
869
+ Cow :: Borrowed ( slice)
863
870
} else {
864
- Cow :: Borrowed ( s )
871
+ Cow :: Owned ( front_ansi + head + slice + tail + & back_ansi )
865
872
}
866
873
}
867
-
868
874
#[ cfg( not( feature = "ansi-parsing" ) ) ]
869
875
{
870
- if s. len ( ) <= width - tail. len ( ) {
871
- Cow :: Borrowed ( s)
876
+ let slice = s. get ( bounds) . unwrap_or ( "" ) ;
877
+
878
+ if head. is_empty ( ) && tail. is_empty ( ) {
879
+ Cow :: Borrowed ( slice)
872
880
} else {
873
- Cow :: Owned ( format ! (
874
- "{}{}" ,
875
- s. get( ..width - tail. len( ) ) . unwrap_or_default( ) ,
876
- tail
877
- ) )
881
+ Cow :: Owned ( format ! ( "{head}{slice}{tail}" ) )
878
882
}
879
883
}
880
884
}
881
885
886
+ /// Truncates a string to a certain number of characters.
887
+ ///
888
+ /// This ensures that escape codes are not screwed up in the process.
889
+ /// If the maximum length is hit the string will be truncated but
890
+ /// escapes code will still be honored. If truncation takes place
891
+ /// the tail string will be appended.
892
+ pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
893
+ if measure_text_width ( s) > width {
894
+ let tail_width = measure_text_width ( tail) ;
895
+ slice_str ( s, "" , 0 ..width. saturating_sub ( tail_width) , tail)
896
+ } else {
897
+ Cow :: Borrowed ( s)
898
+ }
899
+ }
900
+
882
901
/// Pads a string to fill a certain number of characters.
883
902
///
884
903
/// This will honor ansi codes correctly and allows you to align a string
@@ -987,8 +1006,50 @@ fn test_truncate_str() {
987
1006
) ;
988
1007
}
989
1008
1009
+ #[ test]
1010
+ fn test_slice_ansi_str ( ) {
1011
+ // Note that 🐶 is two columns wide
1012
+ let test_str = "Hello\x1b [31m🐶\x1b [1m🐶\x1b [0m world!" ;
1013
+ assert_eq ! ( slice_str( test_str, "" , 0 ..test_str. len( ) , "" ) , test_str) ;
1014
+
1015
+ if cfg ! ( feature = "unicode-width" ) && cfg ! ( feature = "ansi-parsing" ) {
1016
+ assert_eq ! ( measure_text_width( test_str) , 16 ) ;
1017
+
1018
+ assert_eq ! (
1019
+ slice_str( test_str, "" , 5 ..5 , "" ) ,
1020
+ "\u{1b} [31m\u{1b} [1m\u{1b} [0m"
1021
+ ) ;
1022
+
1023
+ assert_eq ! (
1024
+ slice_str( test_str, "" , 0 ..5 , "" ) ,
1025
+ "Hello\x1b [31m\x1b [1m\x1b [0m"
1026
+ ) ;
1027
+
1028
+ assert_eq ! (
1029
+ slice_str( test_str, "" , 0 ..6 , "" ) ,
1030
+ "Hello\x1b [31m\x1b [1m\x1b [0m"
1031
+ ) ;
1032
+
1033
+ assert_eq ! (
1034
+ slice_str( test_str, "" , 0 ..7 , "" ) ,
1035
+ "Hello\x1b [31m🐶\x1b [1m\x1b [0m"
1036
+ ) ;
1037
+
1038
+ assert_eq ! (
1039
+ slice_str( test_str, "" , 4 ..9 , "" ) ,
1040
+ "o\x1b [31m🐶\x1b [1m🐶\x1b [0m"
1041
+ ) ;
1042
+
1043
+ assert_eq ! (
1044
+ slice_str( test_str, "" , 7 ..21 , "" ) ,
1045
+ "\x1b [31m\x1b [1m🐶\x1b [0m world!"
1046
+ ) ;
1047
+ }
1048
+ }
1049
+
990
1050
#[ test]
991
1051
fn test_truncate_str_no_ansi ( ) {
1052
+ assert_eq ! ( & truncate_str( "foo bar" , 7 , "!" ) , "foo bar" ) ;
992
1053
assert_eq ! ( & truncate_str( "foo bar" , 5 , "" ) , "foo b" ) ;
993
1054
assert_eq ! ( & truncate_str( "foo bar" , 5 , "!" ) , "foo !" ) ;
994
1055
assert_eq ! ( & truncate_str( "foo bar baz" , 10 , "..." ) , "foo bar..." ) ;
0 commit comments