@@ -31,9 +31,19 @@ pub(crate) struct Utf16Char {
31
31
}
32
32
33
33
impl Utf16Char {
34
+ /// Returns the length in 8-bit UTF-8 code units.
34
35
fn len ( & self ) -> TextSize {
35
36
self . end - self . start
36
37
}
38
+
39
+ /// Returns the length in 16-bit UTF-16 code units.
40
+ fn len_utf16 ( & self ) -> usize {
41
+ if self . len ( ) == TextSize :: from ( 4 ) {
42
+ 2
43
+ } else {
44
+ 1
45
+ }
46
+ }
37
47
}
38
48
39
49
impl LineIndex {
@@ -110,7 +120,7 @@ impl LineIndex {
110
120
if let Some ( utf16_chars) = self . utf16_lines . get ( & line) {
111
121
for c in utf16_chars {
112
122
if c. end <= col {
113
- res -= usize:: from ( c. len ( ) ) - 1 ;
123
+ res -= usize:: from ( c. len ( ) ) - c . len_utf16 ( ) ;
114
124
} else {
115
125
// From here on, all utf16 characters come *after* the character we are mapping,
116
126
// so we don't need to take them into account
@@ -125,7 +135,7 @@ impl LineIndex {
125
135
if let Some ( utf16_chars) = self . utf16_lines . get ( & line) {
126
136
for c in utf16_chars {
127
137
if col > u32:: from ( c. start ) {
128
- col += u32:: from ( c. len ( ) ) - 1 ;
138
+ col += u32:: from ( c. len ( ) ) - c . len_utf16 ( ) as u32 ;
129
139
} else {
130
140
// From here on, all utf16 characters come *after* the character we are mapping,
131
141
// so we don't need to take them into account
@@ -204,6 +214,9 @@ const C: char = 'メ';
204
214
205
215
// UTF-16 to UTF-8
206
216
assert_eq ! ( col_index. utf16_to_utf8_col( 1 , 19 ) , TextSize :: from( 21 ) ) ;
217
+
218
+ let col_index = LineIndex :: new ( "a𐐏b" ) ;
219
+ assert_eq ! ( col_index. utf16_to_utf8_col( 0 , 3 ) , TextSize :: from( 5 ) ) ;
207
220
}
208
221
209
222
#[ test]
0 commit comments