Skip to content

Commit f171e92

Browse files
Jules BertholetJules-Bertholet
authored andcommitted
Implement split_left_inclusive for string slices
1 parent 13242ef commit f171e92

File tree

3 files changed

+262
-22
lines changed

3 files changed

+262
-22
lines changed

library/alloc/tests/str.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1438,6 +1438,63 @@ fn test_split_char_iterator_inclusive_rev() {
14381438
assert_eq!(split, ["CaT", "TurtlE", "SharK", "SheeP"]);
14391439
}
14401440

1441+
#[test]
1442+
fn test_split_char_iterator_left_inclusive() {
1443+
let split: Vec<&str> = "\n\n\n\n".split_left_inclusive('\n').collect();
1444+
assert_eq!(split, ["\n", "\n", "\n", "\n"]);
1445+
1446+
let split: Vec<&str> = "".split_left_inclusive('\n').collect();
1447+
let rhs: [&str; 0] = [];
1448+
assert_eq!(split, rhs);
1449+
1450+
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1451+
1452+
let split: Vec<&str> = data.split_left_inclusive('\n').collect();
1453+
assert_eq!(split, ["\nMäry häd ä little lämb", "\nLittle lämb", "\n"]);
1454+
1455+
let uppercase_separated = "SheePSharKTurtlECaT";
1456+
let mut first_char = true;
1457+
let split: Vec<&str> = uppercase_separated
1458+
.split_left_inclusive(|c: char| {
1459+
let split = !first_char && c.is_uppercase();
1460+
first_char = split;
1461+
split
1462+
})
1463+
.collect();
1464+
assert_eq!(split, ["Shee", "PShar", "KTurtl", "ECa", "T"]);
1465+
}
1466+
1467+
#[test]
1468+
fn test_split_char_iterator_left_inclusive_rev() {
1469+
let split: Vec<&str> = "\n\n\n\n".split_left_inclusive('\n').rev().collect();
1470+
assert_eq!(split, ["\n", "\n", "\n", "\n"]);
1471+
1472+
let split: Vec<&str> = "".split_left_inclusive('\n').rev().collect();
1473+
let rhs: [&str; 0] = [];
1474+
assert_eq!(split, rhs);
1475+
1476+
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1477+
1478+
let split: Vec<&str> = data.split_left_inclusive('\n').rev().collect();
1479+
assert_eq!(split, ["\n", "\nLittle lämb", "\nMäry häd ä little lämb"]);
1480+
1481+
// Note that the predicate is stateful and thus dependent
1482+
// on the iteration order.
1483+
// (A different predicate is needed for reverse iterator vs normal iterator.)
1484+
// Not sure if anything can be done though.
1485+
let uppercase_separated = "SheePSharKTurtlECaT";
1486+
let mut term_char = true;
1487+
let split: Vec<&str> = uppercase_separated
1488+
.split_left_inclusive(|c: char| {
1489+
let split = term_char && c.is_uppercase();
1490+
term_char = c.is_uppercase();
1491+
split
1492+
})
1493+
.rev()
1494+
.collect();
1495+
assert_eq!(split, ["T", "ECa", "KTurtl", "PShar", "Shee",]);
1496+
}
1497+
14411498
#[test]
14421499
fn test_rsplit() {
14431500
let data = "\nMäry häd ä little lämb\nLittle lämb\n";

library/core/src/str/iter.rs

Lines changed: 157 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ pub(super) struct SplitInternal<'a, P: Pattern<'a>> {
563563
pub(super) start: usize,
564564
pub(super) end: usize,
565565
pub(super) matcher: P::Searcher,
566-
pub(super) allow_trailing_empty: bool,
566+
pub(super) allow_bookending_empty: bool,
567567
pub(super) finished: bool,
568568
}
569569

@@ -576,7 +576,7 @@ where
576576
.field("start", &self.start)
577577
.field("end", &self.end)
578578
.field("matcher", &self.matcher)
579-
.field("allow_trailing_empty", &self.allow_trailing_empty)
579+
.field("allow_bookending_empty", &self.allow_bookending_empty)
580580
.field("finished", &self.finished)
581581
.finish()
582582
}
@@ -585,7 +585,7 @@ where
585585
impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
586586
#[inline]
587587
fn get_end(&mut self) -> Option<&'a str> {
588-
if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
588+
if !self.finished && (self.allow_bookending_empty || self.end - self.start > 0) {
589589
self.finished = true;
590590
// SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
591591
unsafe {
@@ -635,6 +635,38 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
635635
}
636636
}
637637

638+
#[inline]
639+
fn next_left_inclusive(&mut self) -> Option<&'a str> {
640+
if self.finished {
641+
return None;
642+
}
643+
644+
if !self.allow_bookending_empty {
645+
self.allow_bookending_empty = true;
646+
match self.next_left_inclusive() {
647+
Some(elt) if !elt.is_empty() => return Some(elt),
648+
_ => {
649+
if self.finished {
650+
return None;
651+
}
652+
}
653+
}
654+
}
655+
656+
let haystack = self.matcher.haystack();
657+
match self.matcher.next_match() {
658+
// SAFETY: `Searcher` guarantees that `b` lies on unicode boundary,
659+
// and self.start is either the start of the original string,
660+
// or `b` was assigned to it, so it also lies on unicode boundary.
661+
Some((b, _)) => unsafe {
662+
let elt = haystack.get_unchecked(self.start..b);
663+
self.start = b;
664+
Some(elt)
665+
},
666+
None => self.get_end(),
667+
}
668+
}
669+
638670
#[inline]
639671
fn next_back(&mut self) -> Option<&'a str>
640672
where
@@ -644,8 +676,8 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
644676
return None;
645677
}
646678

647-
if !self.allow_trailing_empty {
648-
self.allow_trailing_empty = true;
679+
if !self.allow_bookending_empty {
680+
self.allow_bookending_empty = true;
649681
match self.next_back() {
650682
Some(elt) if !elt.is_empty() => return Some(elt),
651683
_ => {
@@ -681,8 +713,8 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
681713
return None;
682714
}
683715

684-
if !self.allow_trailing_empty {
685-
self.allow_trailing_empty = true;
716+
if !self.allow_bookending_empty {
717+
self.allow_bookending_empty = true;
686718
match self.next_back_inclusive() {
687719
Some(elt) if !elt.is_empty() => return Some(elt),
688720
_ => {
@@ -715,6 +747,40 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
715747
}
716748
}
717749

750+
#[inline]
751+
fn next_back_left_inclusive(&mut self) -> Option<&'a str>
752+
where
753+
P::Searcher: ReverseSearcher<'a>,
754+
{
755+
if self.finished {
756+
return None;
757+
}
758+
759+
let haystack = self.matcher.haystack();
760+
match self.matcher.next_match_back() {
761+
// SAFETY: `Searcher` guarantees that `b` lies on unicode boundary,
762+
// and self.end is either the end of the original string,
763+
// or `b` was assigned to it, so it also lies on unicode boundary.
764+
Some((b, _)) => unsafe {
765+
let elt = haystack.get_unchecked(b..self.end);
766+
self.end = b;
767+
if self.start == b {
768+
self.finished = true;
769+
}
770+
Some(elt)
771+
},
772+
// SAFETY: self.start is either the start of the original string,
773+
// or start of a substring that represents the part of the string that hasn't
774+
// iterated yet. Either way, it is guaranteed to lie on unicode boundary.
775+
// self.end is either the end of the original string,
776+
// or `b` was assigned to it, so it also lies on unicode boundary.
777+
None => unsafe {
778+
self.finished = true;
779+
Some(haystack.get_unchecked(self.start..self.end))
780+
},
781+
}
782+
}
783+
718784
#[inline]
719785
fn as_str(&self) -> &'a str {
720786
// `Self::get_end` doesn't change `self.start`
@@ -1190,18 +1256,6 @@ pub struct SplitAsciiWhitespace<'a> {
11901256
Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, BytesIsNotEmpty>, UnsafeBytesToStr>,
11911257
}
11921258

1193-
/// An iterator over the substrings of a string,
1194-
/// terminated by a substring matching to a predicate function
1195-
/// Unlike `Split`, it contains the matched part as a terminator
1196-
/// of the subslice.
1197-
///
1198-
/// This struct is created by the [`split_inclusive`] method on [`str`].
1199-
/// See its documentation for more.
1200-
///
1201-
/// [`split_inclusive`]: str::split_inclusive
1202-
#[stable(feature = "split_inclusive", since = "1.51.0")]
1203-
pub struct SplitInclusive<'a, P: Pattern<'a>>(pub(super) SplitInternal<'a, P>);
1204-
12051259
#[stable(feature = "split_whitespace", since = "1.1.0")]
12061260
impl<'a> Iterator for SplitWhitespace<'a> {
12071261
type Item = &'a str;
@@ -1319,6 +1373,18 @@ impl<'a> SplitAsciiWhitespace<'a> {
13191373
}
13201374
}
13211375

1376+
/// An iterator over the substrings of a string,
1377+
/// terminated by a substring matching to a predicate function
1378+
/// Unlike `Split`, it contains the matched part as a terminator
1379+
/// of the subslice.
1380+
///
1381+
/// This struct is created by the [`split_inclusive`] method on [`str`].
1382+
/// See its documentation for more.
1383+
///
1384+
/// [`split_inclusive`]: str::split_inclusive
1385+
#[stable(feature = "split_inclusive", since = "1.51.0")]
1386+
pub struct SplitInclusive<'a, P: Pattern<'a>>(pub(super) SplitInternal<'a, P>);
1387+
13221388
#[stable(feature = "split_inclusive", since = "1.51.0")]
13231389
impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> {
13241390
type Item = &'a str;
@@ -1378,6 +1444,78 @@ impl<'a, P: Pattern<'a>> SplitInclusive<'a, P> {
13781444
}
13791445
}
13801446

1447+
/// An iterator over the substrings of a string,
1448+
/// terminated by a substring matching to a predicate function
1449+
/// Unlike `Split`, it contains the matched part as an initiator
1450+
/// of the subslice.
1451+
///
1452+
/// This struct is created by the [`split_left_inclusive`] method on [`str`].
1453+
/// See its documentation for more.
1454+
///
1455+
/// [`split_left_inclusive`]: str::split_left_inclusive
1456+
#[unstable(feature = "split_left_inclusive", issue = "none")]
1457+
pub struct SplitLeftInclusive<'a, P: Pattern<'a>>(pub(super) SplitInternal<'a, P>);
1458+
1459+
#[unstable(feature = "split_left_inclusive", issue = "none")]
1460+
impl<'a, P: Pattern<'a>> Iterator for SplitLeftInclusive<'a, P> {
1461+
type Item = &'a str;
1462+
1463+
#[inline]
1464+
fn next(&mut self) -> Option<&'a str> {
1465+
self.0.next_left_inclusive()
1466+
}
1467+
}
1468+
1469+
#[unstable(feature = "split_left_inclusive", issue = "none")]
1470+
impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitLeftInclusive<'a, P> {
1471+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1472+
f.debug_struct("SplitLeftInclusive").field("0", &self.0).finish()
1473+
}
1474+
}
1475+
1476+
// FIXME(#26925) Remove in favor of `#[derive(Clone)]`
1477+
#[unstable(feature = "split_left_inclusive", issue = "none")]
1478+
impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitLeftInclusive<'a, P> {
1479+
fn clone(&self) -> Self {
1480+
SplitLeftInclusive(self.0.clone())
1481+
}
1482+
}
1483+
1484+
#[unstable(feature = "split_left_inclusive", issue = "none")]
1485+
impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator
1486+
for SplitLeftInclusive<'a, P>
1487+
{
1488+
#[inline]
1489+
fn next_back(&mut self) -> Option<&'a str> {
1490+
self.0.next_back_left_inclusive()
1491+
}
1492+
}
1493+
1494+
#[unstable(feature = "split_left_inclusive", issue = "none")]
1495+
impl<'a, P: Pattern<'a>> FusedIterator for SplitLeftInclusive<'a, P> {}
1496+
1497+
impl<'a, P: Pattern<'a>> SplitLeftInclusive<'a, P> {
1498+
/// Returns remainder of the splitted string
1499+
///
1500+
/// # Examples
1501+
///
1502+
/// ```
1503+
/// #![feature(str_split_inclusive_as_str)]
1504+
/// #![feature(split_left_inclusive)]
1505+
/// let mut split = "Mary had a little lamb".split_left_inclusive(' ');
1506+
/// assert_eq!(split.as_str(), "Mary had a little lamb");
1507+
/// split.next();
1508+
/// assert_eq!(split.as_str(), " had a little lamb");
1509+
/// split.by_ref().for_each(drop);
1510+
/// assert_eq!(split.as_str(), "");
1511+
/// ```
1512+
#[inline]
1513+
#[unstable(feature = "str_split_inclusive_as_str", issue = "77998")]
1514+
pub fn as_str(&self) -> &'a str {
1515+
self.0.as_str()
1516+
}
1517+
}
1518+
13811519
/// An iterator of [`u16`] over the string encoded as UTF-16.
13821520
///
13831521
/// This struct is created by the [`encode_utf16`] method on [`str`].

library/core/src/str/mod.rs

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ pub use iter::SplitAsciiWhitespace;
6969
#[stable(feature = "split_inclusive", since = "1.51.0")]
7070
pub use iter::SplitInclusive;
7171

72+
#[unstable(feature = "split_left_inclusive", issue = "none")]
73+
pub use iter::SplitLeftInclusive;
74+
7275
#[unstable(feature = "str_internals", issue = "none")]
7376
pub use validations::{next_code_point, utf8_char_width};
7477

@@ -1327,7 +1330,7 @@ impl str {
13271330
start: 0,
13281331
end: self.len(),
13291332
matcher: pat.into_searcher(self),
1330-
allow_trailing_empty: true,
1333+
allow_bookending_empty: true,
13311334
finished: false,
13321335
})
13331336
}
@@ -1367,11 +1370,53 @@ impl str {
13671370
start: 0,
13681371
end: self.len(),
13691372
matcher: pat.into_searcher(self),
1370-
allow_trailing_empty: false,
1373+
allow_bookending_empty: false,
13711374
finished: false,
13721375
})
13731376
}
13741377

1378+
/// An iterator over substrings of this string slice, separated by
1379+
/// characters matched by a pattern. Differs from the iterator produced by
1380+
/// `split` in that `split_left_inclusive` leaves the matched part as the
1381+
/// initiator of the substring.
1382+
///
1383+
/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1384+
/// function or closure that determines if a character matches.
1385+
///
1386+
/// [`char`]: prim@char
1387+
/// [pattern]: self::pattern
1388+
///
1389+
/// # Examples
1390+
///
1391+
/// ```
1392+
/// #![feature(split_left_inclusive)]
1393+
/// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
1394+
/// .split_left_inclusive('\n').collect();
1395+
/// assert_eq!(v, ["Mary had a little lamb", "\nlittle lamb", "\nlittle lamb."]);
1396+
/// ```
1397+
///
1398+
/// If the last element of the string is matched,
1399+
/// that element will be considered the initiator of a new substring.
1400+
/// That substring will be the last item returned by the iterator.
1401+
///
1402+
/// ```
1403+
/// #![feature(split_left_inclusive)]
1404+
/// let v: Vec<&str> = "\nMary had a little lamb\nlittle lamb\nlittle lamb.\n"
1405+
/// .split_left_inclusive('\n').collect();
1406+
/// assert_eq!(v, ["\nMary had a little lamb", "\nlittle lamb", "\nlittle lamb.", "\n"]);
1407+
/// ```
1408+
#[unstable(feature = "split_left_inclusive", issue = "none")]
1409+
#[inline]
1410+
pub fn split_left_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitLeftInclusive<'a, P> {
1411+
SplitLeftInclusive(SplitInternal {
1412+
start: 0,
1413+
end: self.len(),
1414+
matcher: pat.into_searcher(self),
1415+
allow_bookending_empty: false,
1416+
finished: self.is_empty(),
1417+
})
1418+
}
1419+
13751420
/// An iterator over substrings of the given string slice, separated by
13761421
/// characters matched by a pattern and yielded in reverse order.
13771422
///
@@ -1469,7 +1514,7 @@ impl str {
14691514
#[stable(feature = "rust1", since = "1.0.0")]
14701515
#[inline]
14711516
pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
1472-
SplitTerminator(SplitInternal { allow_trailing_empty: false, ..self.split(pat).0 })
1517+
SplitTerminator(SplitInternal { allow_bookending_empty: false, ..self.split(pat).0 })
14731518
}
14741519

14751520
/// An iterator over substrings of `self`, separated by characters

0 commit comments

Comments
 (0)