-
Notifications
You must be signed in to change notification settings - Fork 5.2k
Propagate regex comments to source-generated code #120629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
c9267fa
b241d7e
e6a2bd2
1c76030
e512471
56bba80
b5e1e50
fad8a23
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Runtime.CompilerServices; | ||
|
||
[assembly: InternalsVisibleTo("System.Text.RegularExpressions.Tests")] | ||
stephentoub marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -52,7 +52,11 @@ internal ref struct RegexParser | |||||||||||
|
||||||||||||
private bool _ignoreNextParen; // flag to skip capturing a parentheses group | ||||||||||||
|
||||||||||||
private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Hashtable caps, int capsize, Hashtable? capnames, Span<int> optionSpan) | ||||||||||||
private bool _captureComments; // flag to enable comment capture for source generator | ||||||||||||
stephentoub marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||
private Dictionary<RegexNode, List<string>>? _nodeComments; // side-channel for storing comments associated with nodes | ||||||||||||
private List<string>? _pendingComments; // comments waiting to be associated with the next node | ||||||||||||
|
||||||||||||
private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Hashtable caps, int capsize, Hashtable? capnames, Span<int> optionSpan, bool captureComments = false) | ||||||||||||
{ | ||||||||||||
Debug.Assert(pattern != null, "Pattern must be set"); | ||||||||||||
Debug.Assert(culture != null, "Culture must be set"); | ||||||||||||
|
@@ -79,6 +83,10 @@ private RegexParser(string pattern, RegexOptions options, CultureInfo culture, H | |||||||||||
_capnumlist = null; | ||||||||||||
_capnamelist = null; | ||||||||||||
_ignoreNextParen = false; | ||||||||||||
|
||||||||||||
_captureComments = captureComments; | ||||||||||||
_nodeComments = captureComments ? new Dictionary<RegexNode, List<string>>() : null; | ||||||||||||
_pendingComments = captureComments ? new List<string>() : null; | ||||||||||||
} | ||||||||||||
|
||||||||||||
/// <summary>Gets the culture to use based on the specified options.</summary> | ||||||||||||
|
@@ -102,7 +110,7 @@ public static RegexOptions ParseOptionsInPattern(string pattern, RegexOptions op | |||||||||||
|
||||||||||||
public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture) | ||||||||||||
{ | ||||||||||||
using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]); | ||||||||||||
using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize], captureComments: false); | ||||||||||||
|
||||||||||||
parser.CountCaptures(out _); | ||||||||||||
parser.Reset(options); | ||||||||||||
|
@@ -133,6 +141,40 @@ public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo | |||||||||||
return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, parser._hasIgnoreCaseBackreferenceNodes ? culture : null); | ||||||||||||
} | ||||||||||||
|
||||||||||||
/// <summary>Parses a regular expression and captures comments for source generation.</summary> | ||||||||||||
public static RegexTree ParseForSourceGenerator(string pattern, RegexOptions options, CultureInfo culture) | ||||||||||||
stephentoub marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||
{ | ||||||||||||
using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize], captureComments: true); | ||||||||||||
|
||||||||||||
parser.CountCaptures(out _); | ||||||||||||
parser.Reset(options); | ||||||||||||
RegexNode root = parser.ScanRegex(); | ||||||||||||
|
||||||||||||
int[]? captureNumberList = parser._capnumlist; | ||||||||||||
Hashtable? sparseMapping = parser._caps; | ||||||||||||
int captop = parser._captop; | ||||||||||||
|
||||||||||||
int captureCount; | ||||||||||||
if (captureNumberList == null || captop == captureNumberList.Length) | ||||||||||||
{ | ||||||||||||
// The capture list isn't sparse. Null out the capture mapping as it's not necessary, | ||||||||||||
// and store the number of captures. | ||||||||||||
captureCount = captop; | ||||||||||||
sparseMapping = null; | ||||||||||||
} | ||||||||||||
else | ||||||||||||
{ | ||||||||||||
// The capture list is sparse. Store the number of captures, and populate the number-to-names-list. | ||||||||||||
captureCount = captureNumberList.Length; | ||||||||||||
for (int i = 0; i < captureNumberList.Length; i++) | ||||||||||||
{ | ||||||||||||
sparseMapping[captureNumberList[i]] = i; | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, parser._hasIgnoreCaseBackreferenceNodes ? culture : null, parser._nodeComments); | ||||||||||||
} | ||||||||||||
|
||||||||||||
/// <summary>This static call constructs a flat concatenation node given a replacement pattern.</summary> | ||||||||||||
public static RegexReplacement ParseReplacement(string pattern, RegexOptions options, Hashtable caps, int capsize, Hashtable capnames) | ||||||||||||
{ | ||||||||||||
|
@@ -330,6 +372,7 @@ private RegexNode ScanRegex() | |||||||||||
if (isQuantifier) | ||||||||||||
{ | ||||||||||||
_unit = RegexNode.CreateOneWithCaseConversion(_pattern[endpos - 1], _options, _culture, ref _caseBehavior); | ||||||||||||
AttachCommentsToNode(_unit); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
|
@@ -345,13 +388,15 @@ private RegexNode ScanRegex() | |||||||||||
{ | ||||||||||||
string setString = ScanCharClass((_options & RegexOptions.IgnoreCase) != 0, scanOnly: false)!.ToStringClass(); | ||||||||||||
_unit = new RegexNode(RegexNodeKind.Set, _options & ~RegexOptions.IgnoreCase, setString); | ||||||||||||
AttachCommentsToNode(_unit); | ||||||||||||
} | ||||||||||||
break; | ||||||||||||
|
||||||||||||
case '(': | ||||||||||||
_optionsStack.Append((int)_options); | ||||||||||||
if (ScanGroupOpen() is RegexNode grouper) | ||||||||||||
{ | ||||||||||||
AttachCommentsToNode(grouper); | ||||||||||||
PushGroup(); | ||||||||||||
StartGroup(grouper); | ||||||||||||
} | ||||||||||||
|
@@ -388,20 +433,27 @@ private RegexNode ScanRegex() | |||||||||||
} | ||||||||||||
|
||||||||||||
_unit = ScanBackslash(scanOnly: false)!; | ||||||||||||
if (_unit is not null) | ||||||||||||
{ | ||||||||||||
AttachCommentsToNode(_unit); | ||||||||||||
} | ||||||||||||
Comment on lines
+403
to
+406
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The null check is unnecessary because
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @copilot apply the suggested fix with adding assert that _unit is not null before calling AttachCommentsToNode. |
||||||||||||
break; | ||||||||||||
|
||||||||||||
case '^': | ||||||||||||
_unit = new RegexNode((_options & RegexOptions.Multiline) != 0 ? RegexNodeKind.Bol : RegexNodeKind.Beginning, _options); | ||||||||||||
AttachCommentsToNode(_unit); | ||||||||||||
break; | ||||||||||||
|
||||||||||||
case '$': | ||||||||||||
_unit = new RegexNode((_options & RegexOptions.Multiline) != 0 ? RegexNodeKind.Eol : RegexNodeKind.EndZ, _options); | ||||||||||||
AttachCommentsToNode(_unit); | ||||||||||||
break; | ||||||||||||
|
||||||||||||
case '.': | ||||||||||||
_unit = (_options & RegexOptions.Singleline) != 0 ? | ||||||||||||
new RegexNode(RegexNodeKind.Set, _options & ~RegexOptions.IgnoreCase, RegexCharClass.AnyClass) : | ||||||||||||
new RegexNode(RegexNodeKind.Notone, _options & ~RegexOptions.IgnoreCase, '\n'); | ||||||||||||
AttachCommentsToNode(_unit); | ||||||||||||
break; | ||||||||||||
|
||||||||||||
case '{': | ||||||||||||
|
@@ -1048,21 +1100,41 @@ private void ScanBlank() | |||||||||||
|
||||||||||||
if ((_options & RegexOptions.IgnorePatternWhitespace) != 0 && _pos < _pattern.Length && _pattern[_pos] == '#') | ||||||||||||
{ | ||||||||||||
int commentStart = _pos + 1; // Skip the '#' | ||||||||||||
_pos = _pattern.IndexOf('\n', _pos); | ||||||||||||
if (_pos < 0) | ||||||||||||
{ | ||||||||||||
_pos = _pattern.Length; | ||||||||||||
} | ||||||||||||
|
||||||||||||
if (_captureComments && commentStart < _pos) | ||||||||||||
tarekgh marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||
{ | ||||||||||||
string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim(); | ||||||||||||
if (!string.IsNullOrEmpty(comment)) | ||||||||||||
|
||||||||||||
{ | ||||||||||||
_pendingComments!.Add(comment); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
} | ||||||||||||
else if (_pos + 2 < _pattern.Length && _pattern[_pos + 2] == '#' && _pattern[_pos + 1] == '?' && _pattern[_pos] == '(') | ||||||||||||
{ | ||||||||||||
int commentStart = _pos + 3; // Skip '(?#' | ||||||||||||
_pos = _pattern.IndexOf(')', _pos); | ||||||||||||
if (_pos < 0) | ||||||||||||
{ | ||||||||||||
_pos = _pattern.Length; | ||||||||||||
throw MakeException(RegexParseError.UnterminatedComment, SR.UnterminatedComment); | ||||||||||||
} | ||||||||||||
|
||||||||||||
if (_captureComments && commentStart < _pos) | ||||||||||||
{ | ||||||||||||
string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim(); | ||||||||||||
if (!string.IsNullOrEmpty(comment)) | ||||||||||||
{ | ||||||||||||
_pendingComments!.Add(comment); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
_pos++; | ||||||||||||
} | ||||||||||||
else | ||||||||||||
|
@@ -1072,6 +1144,22 @@ private void ScanBlank() | |||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
/// <summary>Attaches any pending comments to the specified node.</summary> | ||||||||||||
private void AttachCommentsToNode(RegexNode node) | ||||||||||||
{ | ||||||||||||
if (_captureComments && _pendingComments!.Count > 0) | ||||||||||||
{ | ||||||||||||
if (!_nodeComments!.TryGetValue(node, out List<string>? comments)) | ||||||||||||
{ | ||||||||||||
comments = new List<string>(); | ||||||||||||
_nodeComments[node] = comments; | ||||||||||||
} | ||||||||||||
|
||||||||||||
comments.AddRange(_pendingComments); | ||||||||||||
_pendingComments.Clear(); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
/// <summary>Scans chars following a '\' (not counting the '\'), and returns a RegexNode for the type of atom scanned</summary> | ||||||||||||
private RegexNode? ScanBackslash(bool scanOnly) | ||||||||||||
{ | ||||||||||||
|
Uh oh!
There was an error while loading. Please reload this page.