|
13 | 13 | // limitations under the License.
|
14 | 14 |
|
15 | 15 | /// Return the right bound of the prefix, so that `p..right` will cover all strings with prefix `p`.
|
| 16 | +/// |
| 17 | +/// If the right bound can not be built, return None. |
16 | 18 | pub fn prefix_right_bound(p: &str) -> Option<String> {
|
17 |
| - let last = p.chars().last()?; |
18 |
| - let mut next_str = p[..p.len() - last.len_utf8()].to_owned(); |
19 |
| - let next_char = char::from_u32(last as u32 + 1)?; |
20 |
| - next_str.push(next_char); |
21 |
| - Some(next_str) |
| 19 | + let mut chars = p.chars().collect::<Vec<_>>(); |
| 20 | + |
| 21 | + // Start from the end of the character list and look for the first character that is not \u{10FFFF} |
| 22 | + for i in (0..chars.len()).rev() { |
| 23 | + if chars[i] as u32 != 0x10FFFF { |
| 24 | + // Try to increment the character |
| 25 | + if let Some(next_char) = char::from_u32(chars[i] as u32 + 1) { |
| 26 | + chars[i] = next_char; |
| 27 | + // Remove all characters after the incremented one |
| 28 | + chars.truncate(i + 1); |
| 29 | + return Some(chars.iter().collect()); |
| 30 | + } else { |
| 31 | + // If incrementing results in an invalid character, return None |
| 32 | + return None; |
| 33 | + } |
| 34 | + } |
| 35 | + } |
| 36 | + |
| 37 | + // If all characters are \u{10FFFF} or the string is empty, return None |
| 38 | + None |
22 | 39 | }
|
23 | 40 |
|
24 | 41 | #[cfg(test)]
|
25 | 42 | mod tests {
|
26 | 43 | use super::prefix_right_bound;
|
27 | 44 |
|
| 45 | + #[test] |
| 46 | + fn test_prefix_right_bound_last_unicode() { |
| 47 | + // Test with the highest possible Unicode character |
| 48 | + assert_eq!(prefix_right_bound("\u{10FFFF}"), None); |
| 49 | + assert_eq!(prefix_right_bound("\u{10FFFF}\u{10FFFF}"), None); |
| 50 | + assert_eq!(prefix_right_bound("a\u{10FFFF}"), Some(s("b"))); |
| 51 | + assert_eq!(prefix_right_bound("a\u{10FFFF}\u{10FFFF}"), Some(s("b"))); |
| 52 | + assert_eq!(prefix_right_bound("aa\u{10FFFF}"), Some(s("ab"))); |
| 53 | + assert_eq!(prefix_right_bound("aa\u{10FFFF}\u{10FFFF}"), Some(s("ab"))); |
| 54 | + assert_eq!( |
| 55 | + prefix_right_bound("aa\u{10FFFF}\u{10FFFF}\u{10FFFF}"), |
| 56 | + Some(s("ab")) |
| 57 | + ); |
| 58 | + } |
| 59 | + |
28 | 60 | #[test]
|
29 | 61 | fn test_next_string() {
|
30 |
| - assert_eq!(None, prefix_right_bound("")); |
31 | 62 | assert_eq!(Some(s("b")), prefix_right_bound("a"));
|
32 | 63 | assert_eq!(Some(s("{")), prefix_right_bound("z"));
|
33 | 64 | assert_eq!(Some(s("foo0")), prefix_right_bound("foo/"));
|
34 | 65 | assert_eq!(Some(s("foo💰")), prefix_right_bound("foo💯"));
|
35 | 66 | }
|
36 | 67 |
|
| 68 | + #[test] |
| 69 | + fn test_prefix_right_bound_basic() { |
| 70 | + // Basic functionality test |
| 71 | + assert_eq!(prefix_right_bound("abc"), Some(s("abd"))); |
| 72 | + } |
| 73 | + |
| 74 | + #[test] |
| 75 | + fn test_prefix_right_bound_empty() { |
| 76 | + // Test with an empty string |
| 77 | + assert_eq!(prefix_right_bound(""), None); |
| 78 | + } |
| 79 | + |
| 80 | + #[test] |
| 81 | + fn test_prefix_right_bound_unicode() { |
| 82 | + // Test with Unicode characters |
| 83 | + assert_eq!(prefix_right_bound("😀"), Some(s("😁"))); |
| 84 | + } |
| 85 | + |
| 86 | + #[test] |
| 87 | + fn test_prefix_right_bound_increment() { |
| 88 | + // Test the boundary condition where the last character increments to the next logical Unicode character |
| 89 | + assert_eq!(prefix_right_bound("a"), Some(s("b"))); |
| 90 | + assert_eq!(prefix_right_bound("z"), Some(s("{"))); // Note: 'z' + 1 = '{' in ASCII |
| 91 | + } |
| 92 | + |
| 93 | + #[test] |
| 94 | + fn test_prefix_right_bound_non_ascii() { |
| 95 | + // Test with non-ASCII characters |
| 96 | + assert_eq!(prefix_right_bound("ñ"), Some(s("\u{00f2}"))); |
| 97 | + } |
| 98 | + |
| 99 | + #[test] |
| 100 | + fn test_prefix_right_bound_complex_string() { |
| 101 | + // Test with strings that require more complex boundary adjustments |
| 102 | + assert_eq!(prefix_right_bound("hello!"), Some(s("hello\""))); |
| 103 | + } |
| 104 | + |
37 | 105 | fn s(s: impl ToString) -> String {
|
38 | 106 | s.to_string()
|
39 | 107 | }
|
|
0 commit comments