Skip to content

Commit 7bf56f5

Browse files
authored
Merge pull request #4885 from Vogel612/fix/regex-character-class-recognition
Fixes regex character class recognition in regex assistant tool
2 parents c8c6203 + 007aa43 commit 7bf56f5

File tree

6 files changed

+80
-60
lines changed

6 files changed

+80
-60
lines changed

Rubberduck.RegexAssistant/Atoms/CharacterClass.cs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@ namespace Rubberduck.RegexAssistant.Atoms
88
{
99
internal class CharacterClass : IAtom
1010
{
11-
public static readonly string Pattern = @"(?<!\\)\[(?<expression>.*?)(?<!\\)\]";
12-
private static readonly Regex Matcher = new Regex($"^{Pattern}$", RegexOptions.Compiled);
13-
1411
public bool InverseMatching { get; }
1512
public IList<string> CharacterSpecifiers { get; }
1613

@@ -22,13 +19,13 @@ public CharacterClass(string specifier, Quantifier quantifier)
2219
}
2320

2421
Quantifier = quantifier;
25-
var m = Matcher.Match(specifier);
26-
if (!m.Success)
22+
if (!specifier.StartsWith("[") || !specifier.EndsWith("]"))
2723
{
2824
throw new ArgumentException("The given specifier does not denote a character class");
2925
}
3026
Specifier = specifier;
31-
var actualSpecifier = m.Groups["expression"].Value;
27+
// trim leading and closing bracket
28+
var actualSpecifier = specifier.Substring(1, specifier.Length - 2);
3229
InverseMatching = actualSpecifier.StartsWith("^");
3330
CharacterSpecifiers= ExtractCharacterSpecifiers(InverseMatching ? actualSpecifier.Substring(1) : actualSpecifier);
3431
}

Rubberduck.RegexAssistant/VBRegexParser.cs

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ namespace Rubberduck.RegexAssistant
1010
internal class VBRegexParser
1111
{
1212
private static readonly Regex LITERAL_PATTERN = new Regex("^" + Literal.Pattern);
13-
private static readonly Regex CHARACTER_CLASS_PATTERN = new Regex("^" + CharacterClass.Pattern);
1413
private static readonly Regex QUANTIFIER_PATTERN = new Regex("^" + Quantifier.Pattern);
1514

1615
public static IRegularExpression Parse(string specifier)
@@ -88,12 +87,7 @@ private static string DescendLiteral(string specifier)
8887

8988
private static string DescendClass(string specifier)
9089
{
91-
var matcher = CHARACTER_CLASS_PATTERN.Match(specifier);
92-
if (matcher.Success)
93-
{
94-
return $"[{matcher.Groups["expression"].Value}]";
95-
}
96-
return "";
90+
return DescendExpression(specifier, '[', ']');
9791
}
9892

9993
private static string GetQuantifier(string specifier, int length)
@@ -108,18 +102,23 @@ private static string GetQuantifier(string specifier, int length)
108102
}
109103

110104
private static string DescendGroup(string specifier)
105+
{
106+
return DescendExpression(specifier, '(', ')');
107+
}
108+
109+
private static string DescendExpression(string specifier, char opening, char closing)
111110
{
112111
int length = 0;
113112
int openingCount = 0;
114113
bool escapeToggle = false;
115-
foreach (var digit in specifier)
114+
foreach (var digit in specifier)
116115
{
117-
if (digit == '(' && !escapeToggle)
116+
if (digit == opening && !escapeToggle)
118117
{
119118
openingCount++;
120119
escapeToggle = false;
121120
}
122-
if (digit == ')' && !escapeToggle)
121+
if (digit == closing && !escapeToggle)
123122
{
124123
openingCount--;
125124
escapeToggle = false;

RubberduckTests/RegexAssistant/CharacterClassTests.cs

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
using System.Collections.Generic;
33
using NUnit.Framework;
44
using Rubberduck.RegexAssistant.Atoms;
5+
using Rubberduck.RegexAssistant.Expressions;
56

67
namespace Rubberduck.RegexAssistant.Tests
78
{
9+
810
[TestFixture]
11+
[Category("RegexAssistant")]
912
public class CharacterClassTests
1013
{
11-
[Category("RegexAssistant")]
1214
[Test]
1315
public void InvertedCharacterClass()
1416
{
@@ -23,7 +25,7 @@ public void InvertedCharacterClass()
2325
}
2426
}
2527

26-
[Category("RegexAssistant")]
28+
2729
[Test]
2830
public void SimpleCharacterRange()
2931
{
@@ -38,7 +40,7 @@ public void SimpleCharacterRange()
3840
}
3941
}
4042

41-
[Category("RegexAssistant")]
43+
4244
[Test]
4345
public void UnicodeCharacterRange()
4446
{
@@ -53,7 +55,7 @@ public void UnicodeCharacterRange()
5355
}
5456
}
5557

56-
[Category("RegexAssistant")]
58+
5759
[Test]
5860
public void OctalCharacterRange()
5961
{
@@ -68,7 +70,7 @@ public void OctalCharacterRange()
6870
}
6971
}
7072

71-
[Category("RegexAssistant")]
73+
7274
[Test]
7375
public void HexadecimalCharacterRange()
7476
{
@@ -83,7 +85,7 @@ public void HexadecimalCharacterRange()
8385
}
8486
}
8587

86-
[Category("RegexAssistant")]
88+
8789
[Test]
8890
public void MixedCharacterRanges()
8991
{
@@ -102,7 +104,7 @@ public void MixedCharacterRanges()
102104
}
103105
}
104106

105-
[Category("RegexAssistant")]
107+
106108
[Test]
107109
public void RangeFailureWithCharacterClass()
108110
{
@@ -124,7 +126,7 @@ public void RangeFailureWithCharacterClass()
124126
}
125127
}
126128

127-
[Category("RegexAssistant")]
129+
128130
[Test]
129131
public void EscapedLiteralRanges()
130132
{
@@ -153,7 +155,7 @@ public void EscapedLiteralRanges()
153155
}
154156
}
155157

156-
[Category("RegexAssistant")]
158+
157159
[Test]
158160
public void SkipsIncorrectlyEscapedLiterals()
159161
{
@@ -173,7 +175,7 @@ public void SkipsIncorrectlyEscapedLiterals()
173175
}
174176
}
175177

176-
[Category("RegexAssistant")]
178+
177179
[Test]
178180
public void IncorrectlyEscapedRangeTargetLiteralsBlowUp()
179181
{
@@ -194,7 +196,7 @@ public void IncorrectlyEscapedRangeTargetLiteralsBlowUp()
194196

195197
}
196198

197-
[Category("RegexAssistant")]
199+
198200
[Test]
199201
public void IgnoresBackreferenceSpecifiers()
200202
{
@@ -208,5 +210,25 @@ public void IgnoresBackreferenceSpecifiers()
208210
Assert.AreEqual(expectedSpecifiers[i], cut.CharacterSpecifiers[i]);
209211
}
210212
}
213+
214+
[Test]
215+
// https://github.com/rubberduck-vba/Rubberduck/issues/4839
216+
public void TrailingEscapedBackslash()
217+
{
218+
const string pattern = @"[^\w\\]";
219+
220+
var expression = VBRegexParser.Parse(pattern);
221+
Assert.IsInstanceOf(typeof(SingleAtomExpression), expression);
222+
var atom = (expression as SingleAtomExpression).Atom;
223+
Assert.AreEqual(new CharacterClass(@"[^\w\\]", Quantifier.None), atom);
224+
}
225+
226+
[Test]
227+
// https://github.com/rubberduck-vba/Rubberduck/issues/4839
228+
public void TrailingEscapedBackslashIsConstructible()
229+
{
230+
const string pattern = @"[^\w\\]";
231+
new CharacterClass(pattern, Quantifier.None);
232+
}
211233
}
212234
}

RubberduckTests/RegexAssistant/LiteralTests.cs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
namespace Rubberduck.RegexAssistant.Tests
77
{
88
[TestFixture]
9+
[Category("RegexAssistant")]
910
public class LiteralTests
1011
{
11-
[Category("RegexAssistant")]
12+
1213
[Test]
1314
public void EscapedLiteralTests()
1415
{
@@ -20,7 +21,7 @@ public void EscapedLiteralTests()
2021
}
2122
}
2223

23-
[Category("RegexAssistant")]
24+
2425
[Test]
2526
public void EscapeSequences()
2627
{
@@ -32,7 +33,7 @@ public void EscapeSequences()
3233
}
3334
}
3435

35-
[Category("RegexAssistant")]
36+
3637
[Test]
3738
public void CodePoints()
3839
{
@@ -44,7 +45,7 @@ public void CodePoints()
4445
}
4546
}
4647

47-
[Category("RegexAssistant")]
48+
4849
[Test]
4950
public void SimpleLiterals()
5051
{
@@ -56,7 +57,7 @@ public void SimpleLiterals()
5657
}
5758
}
5859

59-
[Category("RegexAssistant")]
60+
6061
[Test]
6162
public void EverythingElseBlowsUp()
6263
{
@@ -79,7 +80,7 @@ public void EverythingElseBlowsUp()
7980
}
8081
}
8182

82-
[Category("RegexAssistant")]
83+
8384
[Test]
8485
public void SingleEscapedCharsAreNotParsedAsLiteral()
8586
{

RubberduckTests/RegexAssistant/QuantifierTests.cs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
namespace Rubberduck.RegexAssistant.Tests
44
{
55
[TestFixture]
6+
[Category("RegexAssistant")]
67
public class QuantifierTests
78
{
8-
[Category("RegexAssistant")]
9+
910
[Test]
1011
public void AsteriskQuantifier()
1112
{
@@ -15,7 +16,7 @@ public void AsteriskQuantifier()
1516
Assert.AreEqual(QuantifierKind.Wildcard, cut.Kind);
1617
}
1718

18-
[Category("RegexAssistant")]
19+
1920
[Test]
2021
public void QuestionMarkQuantifier()
2122
{
@@ -25,7 +26,7 @@ public void QuestionMarkQuantifier()
2526
Assert.AreEqual(QuantifierKind.Wildcard, cut.Kind);
2627
}
2728

28-
[Category("RegexAssistant")]
29+
2930
[Test]
3031
public void PlusQuantifier()
3132
{
@@ -35,7 +36,7 @@ public void PlusQuantifier()
3536
Assert.AreEqual(QuantifierKind.Wildcard, cut.Kind);
3637
}
3738

38-
[Category("RegexAssistant")]
39+
3940
[Test]
4041
public void ExactQuantifier()
4142
{
@@ -45,7 +46,7 @@ public void ExactQuantifier()
4546
Assert.AreEqual(QuantifierKind.Expression, cut.Kind);
4647
}
4748

48-
[Category("RegexAssistant")]
49+
4950
[Test]
5051
public void FullRangeQuantifier()
5152
{
@@ -55,7 +56,7 @@ public void FullRangeQuantifier()
5556
Assert.AreEqual(QuantifierKind.Expression, cut.Kind);
5657
}
5758

58-
[Category("RegexAssistant")]
59+
5960
[Test]
6061
public void OpenRangeQuantifier()
6162
{

0 commit comments

Comments
 (0)