Skip to content

Commit 007aa43

Browse files
committed
Use recursive descent for CharacterClass parsing, fix #4839
Character class parsing used to rely on a lookbehind to match the closing bracket, which was inappropriate for a trailing escaped backslash. Instead of attempting to fix the regular expression's lookbehind by forcing an uneven number of backslashes in the negative capture, we now use the same pattern as descending into a group
1 parent 62898ee commit 007aa43

File tree

2 files changed

+12
-16
lines changed

2 files changed

+12
-16
lines changed

Rubberduck.RegexAssistant/Atoms/CharacterClass.cs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@ namespace Rubberduck.RegexAssistant.Atoms
88
{
99
internal class CharacterClass : IAtom
1010
{
11-
public static readonly string Pattern = @"(?<!\\)\[(?<expression>.*?)(?<!\\)\]";
12-
private static readonly Regex Matcher = new Regex($"^{Pattern}$", RegexOptions.Compiled);
13-
1411
public bool InverseMatching { get; }
1512
public IList<string> CharacterSpecifiers { get; }
1613

@@ -22,13 +19,13 @@ public CharacterClass(string specifier, Quantifier quantifier)
2219
}
2320

2421
Quantifier = quantifier;
25-
var m = Matcher.Match(specifier);
26-
if (!m.Success)
22+
if (!specifier.StartsWith("[") || !specifier.EndsWith("]"))
2723
{
2824
throw new ArgumentException("The given specifier does not denote a character class");
2925
}
3026
Specifier = specifier;
31-
var actualSpecifier = m.Groups["expression"].Value;
27+
// trim leading and closing bracket
28+
var actualSpecifier = specifier.Substring(1, specifier.Length - 2);
3229
InverseMatching = actualSpecifier.StartsWith("^");
3330
CharacterSpecifiers= ExtractCharacterSpecifiers(InverseMatching ? actualSpecifier.Substring(1) : actualSpecifier);
3431
}

Rubberduck.RegexAssistant/VBRegexParser.cs

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ namespace Rubberduck.RegexAssistant
1010
internal class VBRegexParser
1111
{
1212
private static readonly Regex LITERAL_PATTERN = new Regex("^" + Literal.Pattern);
13-
private static readonly Regex CHARACTER_CLASS_PATTERN = new Regex("^" + CharacterClass.Pattern);
1413
private static readonly Regex QUANTIFIER_PATTERN = new Regex("^" + Quantifier.Pattern);
1514

1615
public static IRegularExpression Parse(string specifier)
@@ -88,12 +87,7 @@ private static string DescendLiteral(string specifier)
8887

8988
private static string DescendClass(string specifier)
9089
{
91-
var matcher = CHARACTER_CLASS_PATTERN.Match(specifier);
92-
if (matcher.Success)
93-
{
94-
return $"[{matcher.Groups["expression"].Value}]";
95-
}
96-
return "";
90+
return DescendExpression(specifier, '[', ']');
9791
}
9892

9993
private static string GetQuantifier(string specifier, int length)
@@ -108,18 +102,23 @@ private static string GetQuantifier(string specifier, int length)
108102
}
109103

110104
private static string DescendGroup(string specifier)
105+
{
106+
return DescendExpression(specifier, '(', ')');
107+
}
108+
109+
private static string DescendExpression(string specifier, char opening, char closing)
111110
{
112111
int length = 0;
113112
int openingCount = 0;
114113
bool escapeToggle = false;
115-
foreach (var digit in specifier)
114+
foreach (var digit in specifier)
116115
{
117-
if (digit == '(' && !escapeToggle)
116+
if (digit == opening && !escapeToggle)
118117
{
119118
openingCount++;
120119
escapeToggle = false;
121120
}
122-
if (digit == ')' && !escapeToggle)
121+
if (digit == closing && !escapeToggle)
123122
{
124123
openingCount--;
125124
escapeToggle = false;

0 commit comments

Comments
 (0)