diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 1d23aa697f11fe..08a41b20b60133 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -5804,6 +5804,22 @@ RegexNodeKind.BackreferenceConditional when node.Parent.Child(1) == node => "Not string nodeDescription = DescribeNode(node, rm); + // Write out any comments associated with this node. + if (rm.Tree.NodeComments?.TryGetValue(node, out List? comments) is true) + { + string indent = new string(' ', depth * 4); + foreach (string comment in comments) + { + // Split multi-line comments to maintain proper alignment + string[] lines = comment.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); + foreach (string line in lines) + { + string trimmedLine = line.Trim(); + writer.WriteLine($"/// {indent}// {EscapeXmlComment(trimmedLine)}
"); + } + } + } + // Write out the line for the node. const char BulletPoint = '\u25CB'; writer.WriteLine($"/// {new string(' ', depth * 4)}{BulletPoint} {tag}{EscapeXmlComment(nodeDescription)}
"); diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs index 98e3f1ac35c036..48653b79d4a341 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs @@ -75,7 +75,7 @@ public void Initialize(IncrementalGeneratorInitializationContext context) { try { - RegexTree regexTree = RegexParser.Parse(method.Pattern, method.Options | RegexOptions.Compiled, method.Culture); // make sure Compiled is included to get all optimizations applied to it + RegexTree regexTree = RegexParser.Parse(method.Pattern, method.Options | RegexOptions.Compiled, method.Culture, captureComments: true); // make sure Compiled is included to get all optimizations applied to it AnalysisResults analysis = RegexTreeAnalyzer.Analyze(regexTree); return new RegexMethod(method.DeclaringType, method.IsProperty, method.DiagnosticLocation, method.MemberName, method.Modifiers, method.NullableRegex, method.Pattern, method.Options, method.MatchTimeout, regexTree, analysis, method.CompilationData); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs index 7287e39125f1e1..12929c5b0f7f05 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs @@ -52,7 +52,10 @@ internal ref struct RegexParser private bool _ignoreNextParen; // flag to skip capturing a parentheses group - private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Hashtable caps, int capsize, Hashtable? capnames, Span optionSpan) + private Dictionary>? _nodeComments; // side-channel for storing comments associated with nodes + private List? _pendingComments; // comments waiting to be associated with the next node + + private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Hashtable caps, int capsize, Hashtable? capnames, Span optionSpan, bool captureComments = false) { Debug.Assert(pattern != null, "Pattern must be set"); Debug.Assert(culture != null, "Culture must be set"); @@ -79,6 +82,12 @@ private RegexParser(string pattern, RegexOptions options, CultureInfo culture, H _capnumlist = null; _capnamelist = null; _ignoreNextParen = false; + + if (captureComments) + { + _nodeComments = new Dictionary>(); + _pendingComments = new List(); + } } /// Gets the culture to use based on the specified options. @@ -100,9 +109,9 @@ public static RegexOptions ParseOptionsInPattern(string pattern, RegexOptions op return foundOptionsInPattern; } - public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture) + public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture, bool captureComments = false) { - using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]); + using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize], captureComments); parser.CountCaptures(out _); parser.Reset(options); @@ -130,7 +139,7 @@ public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo } } - return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, parser._hasIgnoreCaseBackreferenceNodes ? culture : null); + return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, parser._hasIgnoreCaseBackreferenceNodes ? culture : null, parser._nodeComments); } /// This static call constructs a flat concatenation node given a replacement pattern. @@ -330,6 +339,7 @@ private RegexNode ScanRegex() if (isQuantifier) { _unit = RegexNode.CreateOneWithCaseConversion(_pattern[endpos - 1], _options, _culture, ref _caseBehavior); + AttachCommentsToNode(_unit); } } @@ -345,6 +355,7 @@ private RegexNode ScanRegex() { string setString = ScanCharClass((_options & RegexOptions.IgnoreCase) != 0, scanOnly: false)!.ToStringClass(); _unit = new RegexNode(RegexNodeKind.Set, _options & ~RegexOptions.IgnoreCase, setString); + AttachCommentsToNode(_unit); } break; @@ -352,6 +363,7 @@ private RegexNode ScanRegex() _optionsStack.Append((int)_options); if (ScanGroupOpen() is RegexNode grouper) { + AttachCommentsToNode(grouper); PushGroup(); StartGroup(grouper); } @@ -388,20 +400,27 @@ private RegexNode ScanRegex() } _unit = ScanBackslash(scanOnly: false)!; + if (_unit is not null) + { + AttachCommentsToNode(_unit); + } break; case '^': _unit = new RegexNode((_options & RegexOptions.Multiline) != 0 ? RegexNodeKind.Bol : RegexNodeKind.Beginning, _options); + AttachCommentsToNode(_unit); break; case '$': _unit = new RegexNode((_options & RegexOptions.Multiline) != 0 ? RegexNodeKind.Eol : RegexNodeKind.EndZ, _options); + AttachCommentsToNode(_unit); break; case '.': _unit = (_options & RegexOptions.Singleline) != 0 ? new RegexNode(RegexNodeKind.Set, _options & ~RegexOptions.IgnoreCase, RegexCharClass.AnyClass) : new RegexNode(RegexNodeKind.Notone, _options & ~RegexOptions.IgnoreCase, '\n'); + AttachCommentsToNode(_unit); break; case '{': @@ -1048,14 +1067,23 @@ private void ScanBlank() if ((_options & RegexOptions.IgnorePatternWhitespace) != 0 && _pos < _pattern.Length && _pattern[_pos] == '#') { + int commentStart = _pos + 1; // Skip the '#' _pos = _pattern.IndexOf('\n', _pos); if (_pos < 0) { _pos = _pattern.Length; } + + if (_pendingComments is not null && commentStart < _pos) + { + string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim(); + // Preserve even empty comments for visual separation + _pendingComments.Add(comment); + } } else if (_pos + 2 < _pattern.Length && _pattern[_pos + 2] == '#' && _pattern[_pos + 1] == '?' && _pattern[_pos] == '(') { + int commentStart = _pos + 3; // Skip '(?#' _pos = _pattern.IndexOf(')', _pos); if (_pos < 0) { @@ -1063,6 +1091,13 @@ private void ScanBlank() throw MakeException(RegexParseError.UnterminatedComment, SR.UnterminatedComment); } + if (_pendingComments is not null && commentStart < _pos) + { + string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim(); + // Preserve even empty comments for visual separation + _pendingComments.Add(comment); + } + _pos++; } else @@ -1072,6 +1107,22 @@ private void ScanBlank() } } + /// Attaches any pending comments to the specified node. + private void AttachCommentsToNode(RegexNode node) + { + if (_pendingComments is not null && _pendingComments.Count > 0) + { + if (!_nodeComments!.TryGetValue(node, out List? comments)) + { + comments = new List(); + _nodeComments[node] = comments; + } + + comments.AddRange(_pendingComments); + _pendingComments.Clear(); + } + } + /// Scans chars following a '\' (not counting the '\'), and returns a RegexNode for the type of atom scanned private RegexNode? ScanBackslash(bool scanOnly) { diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs index 8a827001cc2f98..25f9d8446f6cad 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Collections; +using System.Collections.Generic; using System.Diagnostics; using System.Globalization; @@ -39,8 +40,10 @@ internal sealed class RegexTree /// capture group number and the value is the index into for that capture group. /// public readonly Hashtable? CaptureNumberSparseMapping; + /// A mapping of RegexNode to its associated comments from the pattern (for source generator use only). + internal readonly Dictionary>? NodeComments; - internal RegexTree(RegexNode root, int captureCount, string[]? captureNames, Hashtable? captureNameToNumberMapping, Hashtable? captureNumberSparseMapping, RegexOptions options, CultureInfo? culture) + internal RegexTree(RegexNode root, int captureCount, string[]? captureNames, Hashtable? captureNameToNumberMapping, Hashtable? captureNumberSparseMapping, RegexOptions options, CultureInfo? culture, Dictionary>? nodeComments = null) { #if DEBUG // Asserts to both demonstrate and validate the relationships between the various capture data structures. @@ -77,6 +80,7 @@ internal RegexTree(RegexNode root, int captureCount, string[]? captureNames, Has CaptureNameToNumberMapping = captureNameToNumberMapping; CaptureNames = captureNames; Options = options; + NodeComments = nodeComments; FindOptimizations = RegexFindOptimizations.Create(root, options); } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs index 3792e4704a1af6..12a1b633463a81 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs @@ -1193,5 +1193,29 @@ partial class C // The actual pattern string should properly escape the newline for C# Assert.Contains("base.pattern = \"\\n\";", actual); } + + [Fact] + public async Task ValidateCommentsInGeneratedCode() + { + string program = """ + using System.Text.RegularExpressions; + partial class C + { + [GeneratedRegex(@"(?x) + ^ # Start of line + \w+ # Word characters + $ # End of line + ")] + public static partial Regex WithComments(); + } + """; + + string actual = await RegexGeneratorHelper.GenerateSourceText(program, allowUnsafe: true, checkOverflow: false); + + // Verify comments appear in the explanation section + Assert.Contains("// Start of line", actual); + Assert.Contains("// Word characters", actual); + Assert.Contains("// End of line", actual); + } } }