Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using Xunit;

namespace System.Text.RegularExpressions.Tests
{
/// <summary>
/// These tests have been ported from the re2 test suite located at https://github.com/google/re2/tree/61c4644171ee6b480540bf9e569cba06d9090b4b/re2/testing
/// in order to increase .NET's test coverage. You can find the relevant repo license in this folder's THIRD-PARTY-NOTICES.TXT file.
/// </summary>
public class RegexRe2Tests
{
public static IEnumerable<object[]> Re2TestData()
{
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
{
(string pattern, RegexOptions options, string input, bool expectedSuccess)[] cases = Re2TestData_Cases(engine).ToArray();
Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.pattern, (CultureInfo?)null, (RegexOptions?)c.options, (TimeSpan?)null)).ToArray());
for (int i = 0; i < regexes.Length; i++)
{
yield return new object[] { regexes[i], cases[i].input, cases[i].expectedSuccess };
}
}
}

public static IEnumerable<(string Pattern, RegexOptions options, string Input, bool ExpectedSuccess)> Re2TestData_Cases(RegexEngine engine)
{
// Skip backreferences for NonBacktracking engine
bool skipBackreferences = RegexHelpers.IsNonBacktracking(engine);

// Basic matching tests from search_test.cc and re2_test.cc
yield return ("a", RegexOptions.None, "a", true);
yield return ("a", RegexOptions.None, "zyzzyva", true);
yield return ("a+", RegexOptions.None, "aa", true);
yield return ("(a+|b)+", RegexOptions.None, "ab", true);
yield return ("ab|cd", RegexOptions.None, "xabcdx", true);
yield return ("h.*o", RegexOptions.None, "hello", true);
yield return ("h.*o", RegexOptions.None, "hello world", true);
yield return ("h.*o", RegexOptions.None, "othello, world", true);
yield return ("[^\\s\\S]", RegexOptions.None, "aaaaaaa", false);
yield return ("a", RegexOptions.None, "aaaaaaa", true);
yield return ("a*", RegexOptions.None, "aaaaaaa", true);
yield return ("a*", RegexOptions.None, "", true);
yield return ("ab|cd", RegexOptions.None, "xabcdx", true);
yield return ("a", RegexOptions.None, "cab", true);
yield return ("a*b", RegexOptions.None, "cab", true);
yield return ("((((((((((((((((((((x))))))))))))))))))))", RegexOptions.None, "x", true);
yield return ("[abcd]", RegexOptions.None, "xxxabcdxxx", true);
yield return ("[^x]", RegexOptions.None, "xxxabcdxxx", true);
yield return ("[abcd]+", RegexOptions.None, "xxxabcdxxx", true);
yield return ("[^x]+", RegexOptions.None, "xxxabcdxxx", true);
yield return ("(fo|foo)", RegexOptions.None, "fo", true);
yield return ("(foo|fo)", RegexOptions.None, "foo", true);

// Anchor tests
yield return ("^foo", RegexOptions.None, "foo", true);
yield return ("^foo", RegexOptions.None, "foobar", true);
yield return ("^foo", RegexOptions.None, "barfoo", false);
yield return ("foo$", RegexOptions.None, "foo", true);
yield return ("foo$", RegexOptions.None, "barfoo", true);
yield return ("foo$", RegexOptions.None, "foobar", false);
yield return ("^foo$", RegexOptions.None, "foo", true);
yield return ("^foo$", RegexOptions.None, "foobar", false);
yield return ("^foo$", RegexOptions.None, "barfoo", false);
yield return ("^$", RegexOptions.None, "", true);
yield return ("^$", RegexOptions.None, "x", false);
yield return ("^", RegexOptions.None, "x", true);
yield return ("$", RegexOptions.None, "x", true);

// Word boundaries
yield return ("\\bfoo\\b", RegexOptions.None, "nofoo foo that", true);
yield return ("\\bfoo\\b", RegexOptions.None, "nofoo foothat", false);
yield return ("a\\b", RegexOptions.None, "faoa x", true);
yield return ("\\bbar", RegexOptions.None, "bar x", true);
yield return ("\\bbar", RegexOptions.None, "foo\nbar x", true);
yield return ("bar\\b", RegexOptions.None, "foobar", true);
yield return ("bar\\b", RegexOptions.None, "foobar\nxxx", true);
yield return ("\\b", RegexOptions.None, "x", true);
yield return ("\\b(foo|bar|[A-Z])\\b", RegexOptions.None, "X", true);
yield return ("\\b(foo|bar|[A-Z])\\b", RegexOptions.None, "bar", true);
yield return ("\\b(foo|bar|[A-Z])\\b", RegexOptions.None, "foo", true);
yield return ("\\b(fo|foo)\\b", RegexOptions.None, "fo", true);
yield return ("\\b(fo|foo)\\b", RegexOptions.None, "foo", true);

// Non-word boundaries
yield return ("\\Bfoo\\B", RegexOptions.None, "n foo xfoox that", true);
yield return ("a\\B", RegexOptions.None, "faoa x", true);
yield return ("\\Bbar", RegexOptions.None, "bar x", false);
yield return ("bar\\B", RegexOptions.None, "foobar", false);
yield return ("(foo|bar|[A-Z])\\B", RegexOptions.None, "foox", true);
yield return ("\\B(foo|bar|[A-Z])\\B", RegexOptions.None, "xXy", true);
yield return ("\\B(foo|bar|[A-Z])\\B", RegexOptions.None, "abara", true);
yield return ("\\B(fo|foo)\\B", RegexOptions.None, "xfoo", true);
yield return ("\\B(foo|fo)\\B", RegexOptions.None, "xfooo", true);

// Word boundary with special characters
yield return ("\\bx\\b", RegexOptions.None, "x", true);
yield return ("\\bx\\b", RegexOptions.None, "x>", true);
yield return ("\\bx\\b", RegexOptions.None, "<x", true);
yield return ("\\bx\\b", RegexOptions.None, "<x>", true);
yield return ("\\bx\\b", RegexOptions.None, "ax", false);
yield return ("\\bx\\b", RegexOptions.None, "xb", false);
yield return ("\\bx\\b", RegexOptions.None, "axb", false);
yield return ("\\bx\\b", RegexOptions.None, "«x", true);
yield return ("\\bx\\b", RegexOptions.None, "x»", true);
yield return ("\\bx\\b", RegexOptions.None, "«x»", true);
// Note: .NET treats Unicode letters as word characters, unlike RE2/PCRE which only use ASCII
// So \bx\b won't match "áxβ" in .NET
yield return ("\\Bx\\B", RegexOptions.None, "axb", true);

// UTF-8 tests
yield return ("^\u672c$", RegexOptions.None, "\u672c", true);
yield return ("^...$", RegexOptions.None, "\u65e5\u672c\u8a9e", true);
yield return ("^...$", RegexOptions.None, ".\u672c.", true);

// Octal escapes
yield return ("\\141", RegexOptions.None, "a", true);
yield return ("\\060", RegexOptions.None, "0", true);
yield return ("\\01", RegexOptions.None, "\u0001", true);

// Hexadecimal escapes
yield return ("\\x61", RegexOptions.None, "a", true);
yield return ("\\u0061", RegexOptions.None, "a", true);

// Character classes & case folding
yield return ("(?i)[@-A]+", RegexOptions.None, "@AaB", true);
yield return ("(?i)[A-Z]+", RegexOptions.None, "aAzZ", true);
yield return ("(?i)[^\\\\]+", RegexOptions.None, "Aa\\", true);
yield return ("(?i)[acegikmoqsuwy]+", RegexOptions.None, "acegikmoqsuwyACEGIKMOQSUWY", true);

// Without case folding
yield return ("[@-A]+", RegexOptions.None, "@AaB", true);
yield return ("[A-Z]+", RegexOptions.None, "aAzZ", true);
yield return ("[^\\\\]+", RegexOptions.None, "Aa\\", true);
yield return ("[acegikmoqsuwy]+", RegexOptions.None, "acegikmoqsuwyACEGIKMOQSUWY", true);

// Anchoring tests
yield return ("^abc", RegexOptions.None, "abcdef", true);
yield return ("^abc", RegexOptions.None, "aabcdef", false);
yield return ("def$", RegexOptions.None, "abcdef", true);
yield return ("def$", RegexOptions.None, "abcdeff", false);

// Multiline mode
yield return ("(?m)^foo", RegexOptions.None, "bar\nfoo", true);
yield return ("(?m)^foo", RegexOptions.None, "barfoo", false);
yield return ("(?m)bar$", RegexOptions.None, "bar\nfoo", true);
yield return ("(?m)bar$", RegexOptions.None, "barfoo", false);

// Context tests
yield return ("a", RegexOptions.None, "a", true);
yield return ("ab*", RegexOptions.None, "a", true);

// Former bugs
yield return ("\\w*I\\w*", RegexOptions.None, "Inc.", true);
yield return ("(?:|a)*", RegexOptions.None, "aaa", true);
yield return ("(?:|a)+", RegexOptions.None, "aaa", true);

// FullMatch tests from re2_test.cc
yield return ("h", RegexOptions.None, "h", true);
yield return ("hello", RegexOptions.None, "hello", true);
yield return ("h.*o", RegexOptions.None, "hello", true);

// PartialMatch tests
yield return ("x", RegexOptions.None, "x", true);
yield return ("h.*o", RegexOptions.None, "hello", true);
yield return ("h.*o", RegexOptions.None, "othello", true);
yield return ("h.*o", RegexOptions.None, "hello!", true);

// Braces
yield return ("[0-9a-f+.-]{5,}", RegexOptions.None, "0abcd", true);
yield return ("[0-9a-f+.-]{5,}", RegexOptions.None, "0abcde", true);
yield return ("[0-9a-f+.-]{5,}", RegexOptions.None, "0abc", false);

// Complicated RE
yield return ("foo|bar|[A-Z]", RegexOptions.None, "foo", true);
yield return ("foo|bar|[A-Z]", RegexOptions.None, "bar", true);
yield return ("foo|bar|[A-Z]", RegexOptions.None, "X", true);
yield return ("foo|bar|[A-Z]", RegexOptions.None, "XY", true);

// Check full-match handling
yield return ("fo|foo", RegexOptions.None, "fo", true);
yield return ("fo|foo", RegexOptions.None, "foo", true);
yield return ("foo$", RegexOptions.None, "foo", true);

// UTF-8 handling
yield return (".", RegexOptions.None, "\u65e5", true);

// Case insensitive
yield return ("(?i)HELLO", RegexOptions.None, "hello", true);
yield return ("(?i)hello", RegexOptions.None, "HELLO", true);
yield return ("(?i)[a-z]+", RegexOptions.None, "AbCdE", true);

// Perl operators that work
yield return ("(?:foo)", RegexOptions.None, "foo", true);

// Backreferences - skip for NonBacktracking engine
if (!skipBackreferences)
{
yield return ("(foo)\\1", RegexOptions.None, "foofoo", true);
yield return ("(foo)\\1", RegexOptions.None, "foobar", false);
}

// Quantifiers
yield return ("a?", RegexOptions.None, "", true);
yield return ("a?", RegexOptions.None, "a", true);
yield return ("a?", RegexOptions.None, "aa", true);
yield return ("a+", RegexOptions.None, "", false);
yield return ("a+", RegexOptions.None, "a", true);
yield return ("a+", RegexOptions.None, "aa", true);
yield return ("a*", RegexOptions.None, "", true);
yield return ("a*", RegexOptions.None, "a", true);
yield return ("a*", RegexOptions.None, "aa", true);
yield return ("a{2}", RegexOptions.None, "a", false);
yield return ("a{2}", RegexOptions.None, "aa", true);
yield return ("a{2}", RegexOptions.None, "aaa", true);
yield return ("a{2,}", RegexOptions.None, "a", false);
yield return ("a{2,}", RegexOptions.None, "aa", true);
yield return ("a{2,}", RegexOptions.None, "aaa", true);
yield return ("a{2,4}", RegexOptions.None, "a", false);
yield return ("a{2,4}", RegexOptions.None, "aa", true);
yield return ("a{2,4}", RegexOptions.None, "aaa", true);
yield return ("a{2,4}", RegexOptions.None, "aaaa", true);
yield return ("a{2,4}", RegexOptions.None, "aaaaa", true);
}

[Theory]
[MemberData(nameof(Re2TestData))]
public void IsMatchTests(Regex regex, string input, bool expectSuccess)
=> Assert.Equal(expectSuccess, regex.IsMatch(input));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
<Compile Include="RegexMatchTimeoutExceptionTests.cs" />
<Compile Include="RegexParserTests.cs" />
<Compile Include="RegexPcreTests.cs" />
<Compile Include="RegexRe2Tests.cs" />
<Compile Include="RegexRustTests.cs" />
</ItemGroup>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,37 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

License notice for https://github.com/google/re2
-------------------------------------------------

Copyright (c) 2009 The RE2 Authors. All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

License notice for https://github.com/PCRE2Project/pcre2
--------------------------------------------------------

Expand Down
Loading