From 68db62fe1794089bbc6eb1adcbdd13c0b8b872a9 Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 15:23:54 -0600 Subject: [PATCH 01/14] Add SplitRInclusive which is like SplitInclusive but included on the following match --- library/core/src/str/iter.rs | 128 +++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 94cb81e9d41a1..dc0570b195f1b 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -634,6 +634,26 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { None => self.get_end(), } } + + #[inline] + fn next_rinclusive(&mut self) -> Option<&'a str> { + if self.finished { + return None; + } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match() { + // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries, + // and self.start is either the start of the original string, + // or `b` was assigned to it, so it also lies on unicode boundary. + Some((a, _)) => unsafe { + let elt = haystack.get_unchecked(self.start..a); + self.start = a; + Some(elt) + }, + None => self.get_end(), + } + } #[inline] fn next_back(&mut self) -> Option<&'a str> @@ -714,6 +734,49 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { }, } } + + #[inline] + fn next_back_rinclusive(&mut self) -> Option<&'a str> + where + P::Searcher: ReverseSearcher<'a>, + { + if self.finished { + return None; + } + + if !self.allow_trailing_empty { + self.allow_trailing_empty = true; + match self.next_back_rinclusive() { + Some(elt) if !elt.is_empty() => return Some(elt), + _ => { + if self.finished { + return None; + } + } + } + } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match_back() { + // SAFETY: `Searcher` guarantees that `a` lies on unicode boundary, + // and self.end is either the end of the original string, + // or `a` was assigned to it, so it also lies on unicode boundary. + Some((a, _)) => unsafe { + let elt = haystack.get_unchecked(a..self.end); + self.end = a; + Some(elt) + }, + // SAFETY: self.start is either the start of the original string, + // or start of a substring that represents the part of the string that hasn't + // iterated yet. Either way, it is guaranteed to lie on unicode boundary. + // self.end is either the end of the original string, + // or `b` was assigned to it, so it also lies on unicode boundary. + None => unsafe { + self.finished = true; + Some(haystack.get_unchecked(self.start..self.end)) + }, + } + } #[inline] fn as_str(&self) -> &'a str { @@ -1376,6 +1439,71 @@ impl<'a, P: Pattern<'a>> SplitInclusive<'a, P> { } } +/// An iterator over the substrings of a string, +/// new substrings beginning when matching to a predicate function. +/// Unlike `Split`, it contains the matched part as the start +/// of each subslice - besides the first, which is the contents +/// up until the first match. +/// +/// This struct is created by the [`split_rinclusive`] method on [`str`]. +/// See its documentation for more. +/// +/// [`split_rinclusive`]: str::split_rinclusive +#[stable(feature = "split_rinclusive", since = "1.51.0")] +#[derive(Clone)] +pub struct SplitRInclusive<'a, P: Pattern<'a>>(pub(super) SplitInternal<'a, P>); + +#[stable(feature = "split_rinclusive", since = "1.51.0")] +impl<'a, P: Pattern<'a>> Iterator for SplitRInclusive<'a, P> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + self.0.next_rinclusive() + } +} + +#[stable(feature = "split_rinclusive", since = "1.51.0")] +impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitRInclusive<'a, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitRInclusive").field("0", &self.0).finish() + } +} + +#[stable(feature = "split_rinclusive", since = "1.51.0")] +impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator + for SplitRInclusive<'a, P> +{ + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + self.0.next_back_rinclusive() + } +} + +#[stable(feature = "split_rinclusive", since = "1.51.0")] +impl<'a, P: Pattern<'a>> FusedIterator for SplitRInclusive<'a, P> {} + +impl<'a, P: Pattern<'a>> SplitRInclusive<'a, P> { + /// Returns remainder of the splitted string + /// + /// # Examples + /// + /// ``` + /// #![feature(str_split_rinclusive_as_str)] + /// let mut split = "Mary had a little lamb".split_rinclusive(' '); + /// assert_eq!(split.as_str(), "Mary had a little lamb"); + /// split.next(); + /// assert_eq!(split.as_str(), " had a little lamb"); + /// split.by_ref().for_each(drop); + /// assert_eq!(split.as_str(), ""); + /// ``` + #[inline] + #[unstable(feature = "str_split_rinclusive_as_str", issue = "77998")] + pub fn as_str(&self) -> &'a str { + self.0.as_str() + } +} + /// An iterator of [`u16`] over the string encoded as UTF-16. /// /// This struct is created by the [`encode_utf16`] method on [`str`]. From 2fb77650ed17dbe169e40fb9f9265899e92729e2 Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 15:43:41 -0600 Subject: [PATCH 02/14] Add split_rinclusive method to str --- library/core/src/str/mod.rs | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 607a0179ff4b9..50ecde0ed6802 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -68,6 +68,9 @@ pub use iter::SplitAsciiWhitespace; #[stable(feature = "split_inclusive", since = "1.51.0")] pub use iter::SplitInclusive; +#[stable(feature = "split_rinclusive", since = "1.51.0")] +pub use iter::SplitRInclusive; + #[unstable(feature = "str_internals", issue = "none")] pub use validations::{next_code_point, utf8_char_width}; @@ -1273,6 +1276,55 @@ impl str { finished: false, }) } + + + /// An iterator over substrings of this string slice, separated by + /// characters matched by a pattern. Differs from the iterator produced by + /// `split` in that `split_rinclusive` leaves the matched part as the + /// beginning of the next substring, except possibly the first which is whatever before the first match. + /// + /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a + /// function or closure that determines if a character matches. + /// + /// [`char`]: prim@char + /// [pattern]: self::pattern + /// + /// # Examples + /// + /// ``` + /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb." + /// .split_rinclusive('\n').collect(); + /// assert_eq!(v, ["Mary had a little lamb", "\nlittle lamb", "\nlittle lamb."]); + /// ``` + /// + /// If the first element of the string is matched, + /// the first substring will be an empty string. + /// + /// ``` + /// let v: Vec<&str> = "MaryHadALittleLamb" + /// .split_rinclusive(char::is_uppercase).collect(); + /// assert_eq!(v, ["", "Mary", "Had", "A", "Little", "Lamb]); + /// ``` + /// + /// If the last element of the string is matched, + /// that element will be considered the final substring returned by the iterator. + /// + /// ``` + /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n" + /// .split_rinclusive('\n').collect(); + /// assert_eq!(v, ["Mary had a little lamb", "\nlittle lamb", "\nlittle lamb.", "\n"]); + /// ``` + #[stable(feature = "split_rinclusive", since = "1.51.0")] + #[inline] + pub fn split_rinclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitRInclusive<'a, P> { + SplitRInclusive(SplitInternal { + start: 0, + end: self.len(), + matcher: pat.into_searcher(self), + allow_trailing_empty: false, + finished: false, + }) + } /// An iterator over substrings of the given string slice, separated by /// characters matched by a pattern and yielded in reverse order. From 875ca41d98d0c048237f1a1a2ad5ebca0b7bb603 Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 15:54:24 -0600 Subject: [PATCH 03/14] Add some tests for `str.split_rinclusive(...)` --- library/alloc/tests/str.rs | 44 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index d3a87c056cfb1..6eee2f56c7544 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1325,6 +1325,26 @@ fn test_split_char_iterator_inclusive() { assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]); } + +#[test] +fn test_split_char_iterator_rinclusive() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split_rinclusive('\n').collect(); + assert_eq!(split, ["", "\nMäry häd ä little lämb", "\nLittle lämb", "\n"]); + + let uppercase_separated = "SheepSharkTurtleCat"; + let mut first_char = true; + let split: Vec<&str> = uppercase_separated + .split_rinclusive(|c: char| { + let split = !first_char && c.is_uppercase(); + first_char = split; + split + }) + .collect(); + assert_eq!(split, ["Sheep", "Shark", "Turtle", "Cat"]); +} + #[test] fn test_split_char_iterator_inclusive_rev() { let data = "\nMäry häd ä little lämb\nLittle lämb\n"; @@ -1349,6 +1369,30 @@ fn test_split_char_iterator_inclusive_rev() { assert_eq!(split, ["CaT", "TurtlE", "SharK", "SheeP"]); } +#[test] +fn test_split_char_iterator_rinclusive_rev() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split_rinclusive('\n').rev().collect(); + assert_eq!(split, ["\n", "\nLittle lämb", "\nMäry häd ä little lämb", ""]); + + // Note that the predicate is stateful and thus dependent + // on the iteration order. + // (A different predicate is needed for reverse iterator vs normal iterator.) + // Not sure if anything can be done though. + let uppercase_separated = "SheepSharkTurtleCat"; + let mut term_char = true; + let split: Vec<&str> = uppercase_separated + .split_inclusive(|c: char| { + let split = term_char && c.is_uppercase(); + term_char = c.is_uppercase(); + split + }) + .rev() + .collect(); + assert_eq!(split, ["Cat", "Turtle", "Shark", "Sheep", ""]); +} + #[test] fn test_rsplit() { let data = "\nMäry häd ä little lämb\nLittle lämb\n"; From 06fe4175b0a1f3533ac8b3ab6e150b46823b5534 Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 15:55:35 -0600 Subject: [PATCH 04/14] forgot to update a call to `split_rinclusive` --- library/alloc/tests/str.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index 6eee2f56c7544..23dcced8657a6 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1383,7 +1383,7 @@ fn test_split_char_iterator_rinclusive_rev() { let uppercase_separated = "SheepSharkTurtleCat"; let mut term_char = true; let split: Vec<&str> = uppercase_separated - .split_inclusive(|c: char| { + .split_rinclusive(|c: char| { let split = term_char && c.is_uppercase(); term_char = c.is_uppercase(); split From ca480df2473c3712b829891e4701793854f17118 Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 16:26:56 -0600 Subject: [PATCH 05/14] Mark split_rinclusive unstable, not stable, in core/src/str/iter.rs --- library/core/src/str/iter.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index dc0570b195f1b..41651b664f1e1 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -1449,11 +1449,11 @@ impl<'a, P: Pattern<'a>> SplitInclusive<'a, P> { /// See its documentation for more. /// /// [`split_rinclusive`]: str::split_rinclusive -#[stable(feature = "split_rinclusive", since = "1.51.0")] +#[unstable(feature = "split_rinclusive", issue = "none)] #[derive(Clone)] pub struct SplitRInclusive<'a, P: Pattern<'a>>(pub(super) SplitInternal<'a, P>); -#[stable(feature = "split_rinclusive", since = "1.51.0")] +#[unstable(feature = "split_rinclusive", issue = "none)] impl<'a, P: Pattern<'a>> Iterator for SplitRInclusive<'a, P> { type Item = &'a str; @@ -1463,14 +1463,14 @@ impl<'a, P: Pattern<'a>> Iterator for SplitRInclusive<'a, P> { } } -#[stable(feature = "split_rinclusive", since = "1.51.0")] +#[unstable(feature = "split_rinclusive", issue = "none)] impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitRInclusive<'a, P> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("SplitRInclusive").field("0", &self.0).finish() } } -#[stable(feature = "split_rinclusive", since = "1.51.0")] +#[unstable(feature = "split_rinclusive", issue = "none)] impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator for SplitRInclusive<'a, P> { @@ -1480,7 +1480,7 @@ impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator } } -#[stable(feature = "split_rinclusive", since = "1.51.0")] +#[unstable(feature = "split_rinclusive", issue = "none)] impl<'a, P: Pattern<'a>> FusedIterator for SplitRInclusive<'a, P> {} impl<'a, P: Pattern<'a>> SplitRInclusive<'a, P> { @@ -1498,7 +1498,7 @@ impl<'a, P: Pattern<'a>> SplitRInclusive<'a, P> { /// assert_eq!(split.as_str(), ""); /// ``` #[inline] - #[unstable(feature = "str_split_rinclusive_as_str", issue = "77998")] + #[unstable(feature = "split_rinclusive", issue = "none)] pub fn as_str(&self) -> &'a str { self.0.as_str() } From 19dc3226fad01edd4739f1e29d3c1ca7f57874e3 Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 16:28:14 -0600 Subject: [PATCH 06/14] Mark split_rinclusive unstable, not stable, in core/src/str/mod.rs --- library/core/src/str/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 50ecde0ed6802..14d38caf19a40 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -68,7 +68,7 @@ pub use iter::SplitAsciiWhitespace; #[stable(feature = "split_inclusive", since = "1.51.0")] pub use iter::SplitInclusive; -#[stable(feature = "split_rinclusive", since = "1.51.0")] +#[unstable(feature = "split_rinclusive", issue = "none)] pub use iter::SplitRInclusive; #[unstable(feature = "str_internals", issue = "none")] @@ -1314,7 +1314,7 @@ impl str { /// .split_rinclusive('\n').collect(); /// assert_eq!(v, ["Mary had a little lamb", "\nlittle lamb", "\nlittle lamb.", "\n"]); /// ``` - #[stable(feature = "split_rinclusive", since = "1.51.0")] + #[unstable(feature = "split_rinclusive", issue = "none)] #[inline] pub fn split_rinclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitRInclusive<'a, P> { SplitRInclusive(SplitInternal { From 4192306443bdb5923a748f831dd07b09e03a4daa Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 16:43:58 -0600 Subject: [PATCH 07/14] fix find/replace fail --- library/core/src/str/iter.rs | 12 ++++++------ library/core/src/str/mod.rs | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 41651b664f1e1..bb0ea69d5920c 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -1449,11 +1449,11 @@ impl<'a, P: Pattern<'a>> SplitInclusive<'a, P> { /// See its documentation for more. /// /// [`split_rinclusive`]: str::split_rinclusive -#[unstable(feature = "split_rinclusive", issue = "none)] +#[unstable(feature = "split_rinclusive", issue = "none")] #[derive(Clone)] pub struct SplitRInclusive<'a, P: Pattern<'a>>(pub(super) SplitInternal<'a, P>); -#[unstable(feature = "split_rinclusive", issue = "none)] +#[unstable(feature = "split_rinclusive", issue = "none")] impl<'a, P: Pattern<'a>> Iterator for SplitRInclusive<'a, P> { type Item = &'a str; @@ -1463,14 +1463,14 @@ impl<'a, P: Pattern<'a>> Iterator for SplitRInclusive<'a, P> { } } -#[unstable(feature = "split_rinclusive", issue = "none)] +#[unstable(feature = "split_rinclusive", issue = "none")] impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitRInclusive<'a, P> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("SplitRInclusive").field("0", &self.0).finish() } } -#[unstable(feature = "split_rinclusive", issue = "none)] +#[unstable(feature = "split_rinclusive", issue = "none")] impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator for SplitRInclusive<'a, P> { @@ -1480,7 +1480,7 @@ impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator } } -#[unstable(feature = "split_rinclusive", issue = "none)] +#[unstable(feature = "split_rinclusive", issue = "none")] impl<'a, P: Pattern<'a>> FusedIterator for SplitRInclusive<'a, P> {} impl<'a, P: Pattern<'a>> SplitRInclusive<'a, P> { @@ -1498,7 +1498,7 @@ impl<'a, P: Pattern<'a>> SplitRInclusive<'a, P> { /// assert_eq!(split.as_str(), ""); /// ``` #[inline] - #[unstable(feature = "split_rinclusive", issue = "none)] + #[unstable(feature = "split_rinclusive", issue = "none")] pub fn as_str(&self) -> &'a str { self.0.as_str() } diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 14d38caf19a40..8e9d9ae3965b1 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -68,7 +68,7 @@ pub use iter::SplitAsciiWhitespace; #[stable(feature = "split_inclusive", since = "1.51.0")] pub use iter::SplitInclusive; -#[unstable(feature = "split_rinclusive", issue = "none)] +#[unstable(feature = "split_rinclusive", issue = "none")] pub use iter::SplitRInclusive; #[unstable(feature = "str_internals", issue = "none")] @@ -1314,7 +1314,7 @@ impl str { /// .split_rinclusive('\n').collect(); /// assert_eq!(v, ["Mary had a little lamb", "\nlittle lamb", "\nlittle lamb.", "\n"]); /// ``` - #[unstable(feature = "split_rinclusive", issue = "none)] + #[unstable(feature = "split_rinclusive", issue = "none")] #[inline] pub fn split_rinclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitRInclusive<'a, P> { SplitRInclusive(SplitInternal { From 5b15afde42b269cfb69fd26ab45734d79b96edfa Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 17:02:02 -0600 Subject: [PATCH 08/14] Add `allow_leading_empty` support, remove leading empty for split_rinclusive --- library/alloc/tests/str.rs | 6 ++--- library/core/src/str/iter.rs | 44 ++++++++++++++++++++++++------------ library/core/src/str/mod.rs | 7 ++++-- 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index 23dcced8657a6..690606907c520 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1331,7 +1331,7 @@ fn test_split_char_iterator_rinclusive() { let data = "\nMäry häd ä little lämb\nLittle lämb\n"; let split: Vec<&str> = data.split_rinclusive('\n').collect(); - assert_eq!(split, ["", "\nMäry häd ä little lämb", "\nLittle lämb", "\n"]); + assert_eq!(split, ["\nMäry häd ä little lämb", "\nLittle lämb", "\n"]); let uppercase_separated = "SheepSharkTurtleCat"; let mut first_char = true; @@ -1374,7 +1374,7 @@ fn test_split_char_iterator_rinclusive_rev() { let data = "\nMäry häd ä little lämb\nLittle lämb\n"; let split: Vec<&str> = data.split_rinclusive('\n').rev().collect(); - assert_eq!(split, ["\n", "\nLittle lämb", "\nMäry häd ä little lämb", ""]); + assert_eq!(split, ["\n", "\nLittle lämb", "\nMäry häd ä little lämb"]); // Note that the predicate is stateful and thus dependent // on the iteration order. @@ -1390,7 +1390,7 @@ fn test_split_char_iterator_rinclusive_rev() { }) .rev() .collect(); - assert_eq!(split, ["Cat", "Turtle", "Shark", "Sheep", ""]); + assert_eq!(split, ["Cat", "Turtle", "Shark", "Sheep"]); } #[test] diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index bb0ea69d5920c..4c9c08eeeb131 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -564,6 +564,7 @@ pub(super) struct SplitInternal<'a, P: Pattern<'a>> { pub(super) end: usize, pub(super) matcher: P::Searcher, pub(super) allow_trailing_empty: bool, + pub(super) allow_leading_empty: bool, pub(super) finished: bool, } @@ -577,6 +578,7 @@ where .field("end", &self.end) .field("matcher", &self.matcher) .field("allow_trailing_empty", &self.allow_trailing_empty) + .field("allow_leading_empty", &self.allow_leading_empty) .field("finished", &self.finished) .finish() } @@ -603,6 +605,18 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { return None; } + if !self.allow_leading_empty { + self.allow_leading_empty = true; + match self.next() { + Some(elt) if !elt.is_empty() => return Some(elt), + _ => { + if self.finished { + return None; + } + } + } + } + let haystack = self.matcher.haystack(); match self.matcher.next_match() { // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries. @@ -641,11 +655,23 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { return None; } + if !self.allow_leading_empty { + self.allow_leading_empty = true; + match self.next_rinclusive() { + Some(elt) if !elt.is_empty() => return Some(elt), + _ => { + if self.finished { + return None; + } + } + } + } + let haystack = self.matcher.haystack(); match self.matcher.next_match() { - // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries, + // SAFETY: `Searcher` guarantees that `a` lies on unicode boundaries, // and self.start is either the start of the original string, - // or `b` was assigned to it, so it also lies on unicode boundary. + // or `a` was assigned to it, so it also lies on unicode boundary. Some((a, _)) => unsafe { let elt = haystack.get_unchecked(self.start..a); self.start = a; @@ -744,18 +770,6 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { return None; } - if !self.allow_trailing_empty { - self.allow_trailing_empty = true; - match self.next_back_rinclusive() { - Some(elt) if !elt.is_empty() => return Some(elt), - _ => { - if self.finished { - return None; - } - } - } - } - let haystack = self.matcher.haystack(); match self.matcher.next_match_back() { // SAFETY: `Searcher` guarantees that `a` lies on unicode boundary, @@ -770,7 +784,7 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { // or start of a substring that represents the part of the string that hasn't // iterated yet. Either way, it is guaranteed to lie on unicode boundary. // self.end is either the end of the original string, - // or `b` was assigned to it, so it also lies on unicode boundary. + // or `a` was assigned to it, so it also lies on unicode boundary. None => unsafe { self.finished = true; Some(haystack.get_unchecked(self.start..self.end)) diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 8e9d9ae3965b1..1fb0f8c145c4b 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -1233,6 +1233,7 @@ impl str { end: self.len(), matcher: pat.into_searcher(self), allow_trailing_empty: true, + allow_leading_empty: true, finished: false, }) } @@ -1273,6 +1274,7 @@ impl str { end: self.len(), matcher: pat.into_searcher(self), allow_trailing_empty: false, + allow_leading_empty: true, finished: false, }) } @@ -1298,12 +1300,12 @@ impl str { /// ``` /// /// If the first element of the string is matched, - /// the first substring will be an empty string. + /// the leading empty string is omitted. /// /// ``` /// let v: Vec<&str> = "MaryHadALittleLamb" /// .split_rinclusive(char::is_uppercase).collect(); - /// assert_eq!(v, ["", "Mary", "Had", "A", "Little", "Lamb]); + /// assert_eq!(v, ["Mary", "Had", "A", "Little", "Lamb]); /// ``` /// /// If the last element of the string is matched, @@ -1322,6 +1324,7 @@ impl str { end: self.len(), matcher: pat.into_searcher(self), allow_trailing_empty: false, + allow_leading_empty: false, finished: false, }) } From 1c2d3dd6e7a717eb82b93deae13d1ca12d362245 Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 17:16:28 -0600 Subject: [PATCH 09/14] Add a clarification documentation line, clean up tests. --- library/alloc/tests/str.rs | 14 ++------------ library/core/src/str/mod.rs | 2 ++ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index 690606907c520..f0dc92fcf2e22 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1334,13 +1334,8 @@ fn test_split_char_iterator_rinclusive() { assert_eq!(split, ["\nMäry häd ä little lämb", "\nLittle lämb", "\n"]); let uppercase_separated = "SheepSharkTurtleCat"; - let mut first_char = true; let split: Vec<&str> = uppercase_separated - .split_rinclusive(|c: char| { - let split = !first_char && c.is_uppercase(); - first_char = split; - split - }) + .split_rinclusive(char::is_uppercase) .collect(); assert_eq!(split, ["Sheep", "Shark", "Turtle", "Cat"]); } @@ -1381,13 +1376,8 @@ fn test_split_char_iterator_rinclusive_rev() { // (A different predicate is needed for reverse iterator vs normal iterator.) // Not sure if anything can be done though. let uppercase_separated = "SheepSharkTurtleCat"; - let mut term_char = true; let split: Vec<&str> = uppercase_separated - .split_rinclusive(|c: char| { - let split = term_char && c.is_uppercase(); - term_char = c.is_uppercase(); - split - }) + .split_rinclusive(char::is_uppercase) .rev() .collect(); assert_eq!(split, ["Cat", "Turtle", "Shark", "Sheep"]); diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 1fb0f8c145c4b..db95d246717bc 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -1285,6 +1285,8 @@ impl str { /// `split` in that `split_rinclusive` leaves the matched part as the /// beginning of the next substring, except possibly the first which is whatever before the first match. /// + /// Put another way, a match is the start of a new substring. + /// /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a /// function or closure that determines if a character matches. /// From de9899860ebcdb0543904f903d1e1ecbc7e8a1f7 Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 17:17:27 -0600 Subject: [PATCH 10/14] remove now-irrelevant comment --- library/alloc/tests/str.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index f0dc92fcf2e22..60218e3a50543 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1371,10 +1371,6 @@ fn test_split_char_iterator_rinclusive_rev() { let split: Vec<&str> = data.split_rinclusive('\n').rev().collect(); assert_eq!(split, ["\n", "\nLittle lämb", "\nMäry häd ä little lämb"]); - // Note that the predicate is stateful and thus dependent - // on the iteration order. - // (A different predicate is needed for reverse iterator vs normal iterator.) - // Not sure if anything can be done though. let uppercase_separated = "SheepSharkTurtleCat"; let split: Vec<&str> = uppercase_separated .split_rinclusive(char::is_uppercase) From 4f55bdfe5659ba73117534528bb56461f6e9991f Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 17:31:21 -0600 Subject: [PATCH 11/14] oops, derive clone doesn't work --- library/core/src/str/iter.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 4c9c08eeeb131..6531370e763c6 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -1464,7 +1464,6 @@ impl<'a, P: Pattern<'a>> SplitInclusive<'a, P> { /// /// [`split_rinclusive`]: str::split_rinclusive #[unstable(feature = "split_rinclusive", issue = "none")] -#[derive(Clone)] pub struct SplitRInclusive<'a, P: Pattern<'a>>(pub(super) SplitInternal<'a, P>); #[unstable(feature = "split_rinclusive", issue = "none")] @@ -1484,6 +1483,14 @@ impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitRInclusive<'a } } +// FIXME(#26925) Remove in favor of `#[derive(Clone)]` +#[unstable(feature = "split_rinclusive", issue = "none")] +impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitRInclusive<'a, P> { + fn clone(&self) -> Self { + SplitInclusive(self.0.clone()) + } +} + #[unstable(feature = "split_rinclusive", issue = "none")] impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator for SplitRInclusive<'a, P> From 6940fb64cfe9ea9324ac12a88cb545c974b93cbc Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 17:35:29 -0600 Subject: [PATCH 12/14] fix copy/paste fail --- library/core/src/str/iter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 6531370e763c6..0f65d027fee8c 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -1487,7 +1487,7 @@ impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitRInclusive<'a #[unstable(feature = "split_rinclusive", issue = "none")] impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitRInclusive<'a, P> { fn clone(&self) -> Self { - SplitInclusive(self.0.clone()) + SplitRInclusive(self.0.clone()) } } From 1bf47532489a9a35368ae96f1e3acf691e8b7af1 Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 17:39:51 -0600 Subject: [PATCH 13/14] add feature(split_rinclusive) to testing crate --- library/alloc/tests/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/library/alloc/tests/lib.rs b/library/alloc/tests/lib.rs index 8c57c804ad2dc..5d518d515c915 100644 --- a/library/alloc/tests/lib.rs +++ b/library/alloc/tests/lib.rs @@ -25,6 +25,7 @@ #![feature(const_btree_new)] #![feature(const_default_impls)] #![feature(const_trait_impl)] +#![feature(split_rinclusive)] use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; From 452942d0b87182b1e112662816ed3da4e30d39bf Mon Sep 17 00:00:00 2001 From: Logan Collins Date: Thu, 28 Oct 2021 17:56:54 -0600 Subject: [PATCH 14/14] fmt changes --- library/core/src/str/iter.rs | 6 +++--- library/core/src/str/mod.rs | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 0f65d027fee8c..04b9a5e4b0e53 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -648,7 +648,7 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { None => self.get_end(), } } - + #[inline] fn next_rinclusive(&mut self) -> Option<&'a str> { if self.finished { @@ -760,7 +760,7 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { }, } } - + #[inline] fn next_back_rinclusive(&mut self) -> Option<&'a str> where @@ -1456,7 +1456,7 @@ impl<'a, P: Pattern<'a>> SplitInclusive<'a, P> { /// An iterator over the substrings of a string, /// new substrings beginning when matching to a predicate function. /// Unlike `Split`, it contains the matched part as the start -/// of each subslice - besides the first, which is the contents +/// of each subslice - besides the first, which is the contents /// up until the first match. /// /// This struct is created by the [`split_rinclusive`] method on [`str`]. diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index db95d246717bc..b634beecc9e49 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -1278,8 +1278,7 @@ impl str { finished: false, }) } - - + /// An iterator over substrings of this string slice, separated by /// characters matched by a pattern. Differs from the iterator produced by /// `split` in that `split_rinclusive` leaves the matched part as the