From c9267fa83976aac569920b08460dfe1c4bc9dd42 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Oct 2025 17:26:20 +0000 Subject: [PATCH 1/8] Initial plan From b241d7eab3e6e49cba1a4790bd283b103864d8aa Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Oct 2025 18:08:31 +0000 Subject: [PATCH 2/8] Implement comment capture infrastructure for regex source generator - Add comment capture flag and data structures to RegexParser - Modify ScanBlank to capture both # and (?#) style comments when enabled - Add ParseForSourceGenerator method that enables comment capture - Pass node comments through RegexTree to generator - Update generator to emit comments in XML documentation Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../gen/RegexGenerator.Emitter.cs | 10 ++ .../gen/RegexGenerator.cs | 2 +- .../Text/RegularExpressions/RegexParser.cs | 92 ++++++++++++++++++- .../Text/RegularExpressions/RegexTree.cs | 6 +- 4 files changed, 106 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 1d23aa697f11fe..c9db970bd8afa7 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -5804,6 +5804,16 @@ RegexNodeKind.BackreferenceConditional when node.Parent.Child(1) == node => "Not string nodeDescription = DescribeNode(node, rm); + // Write out any comments associated with this node. + if (rm.Tree.NodeComments?.TryGetValue(node, out List? comments) == true) + { + string indent = new string(' ', depth * 4); + foreach (string comment in comments) + { + writer.WriteLine($"/// {indent}// {EscapeXmlComment(comment)}
"); + } + } + // Write out the line for the node. const char BulletPoint = '\u25CB'; writer.WriteLine($"/// {new string(' ', depth * 4)}{BulletPoint} {tag}{EscapeXmlComment(nodeDescription)}
"); diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs index 98e3f1ac35c036..67f2cd3da2bc23 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs @@ -75,7 +75,7 @@ public void Initialize(IncrementalGeneratorInitializationContext context) { try { - RegexTree regexTree = RegexParser.Parse(method.Pattern, method.Options | RegexOptions.Compiled, method.Culture); // make sure Compiled is included to get all optimizations applied to it + RegexTree regexTree = RegexParser.ParseForSourceGenerator(method.Pattern, method.Options | RegexOptions.Compiled, method.Culture); // make sure Compiled is included to get all optimizations applied to it AnalysisResults analysis = RegexTreeAnalyzer.Analyze(regexTree); return new RegexMethod(method.DeclaringType, method.IsProperty, method.DiagnosticLocation, method.MemberName, method.Modifiers, method.NullableRegex, method.Pattern, method.Options, method.MatchTimeout, regexTree, analysis, method.CompilationData); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs index 7287e39125f1e1..0538917569bb4b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs @@ -52,7 +52,11 @@ internal ref struct RegexParser private bool _ignoreNextParen; // flag to skip capturing a parentheses group - private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Hashtable caps, int capsize, Hashtable? capnames, Span optionSpan) + private bool _captureComments; // flag to enable comment capture for source generator + private Dictionary>? _nodeComments; // side-channel for storing comments associated with nodes + private List? _pendingComments; // comments waiting to be associated with the next node + + private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Hashtable caps, int capsize, Hashtable? capnames, Span optionSpan, bool captureComments = false) { Debug.Assert(pattern != null, "Pattern must be set"); Debug.Assert(culture != null, "Culture must be set"); @@ -79,6 +83,10 @@ private RegexParser(string pattern, RegexOptions options, CultureInfo culture, H _capnumlist = null; _capnamelist = null; _ignoreNextParen = false; + + _captureComments = captureComments; + _nodeComments = captureComments ? new Dictionary>() : null; + _pendingComments = captureComments ? new List() : null; } /// Gets the culture to use based on the specified options. @@ -102,7 +110,7 @@ public static RegexOptions ParseOptionsInPattern(string pattern, RegexOptions op public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture) { - using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]); + using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize], captureComments: false); parser.CountCaptures(out _); parser.Reset(options); @@ -133,6 +141,40 @@ public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, parser._hasIgnoreCaseBackreferenceNodes ? culture : null); } + /// Parses a regular expression and captures comments for source generation. + public static RegexTree ParseForSourceGenerator(string pattern, RegexOptions options, CultureInfo culture) + { + using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize], captureComments: true); + + parser.CountCaptures(out _); + parser.Reset(options); + RegexNode root = parser.ScanRegex(); + + int[]? captureNumberList = parser._capnumlist; + Hashtable? sparseMapping = parser._caps; + int captop = parser._captop; + + int captureCount; + if (captureNumberList == null || captop == captureNumberList.Length) + { + // The capture list isn't sparse. Null out the capture mapping as it's not necessary, + // and store the number of captures. + captureCount = captop; + sparseMapping = null; + } + else + { + // The capture list is sparse. Store the number of captures, and populate the number-to-names-list. + captureCount = captureNumberList.Length; + for (int i = 0; i < captureNumberList.Length; i++) + { + sparseMapping[captureNumberList[i]] = i; + } + } + + return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, parser._hasIgnoreCaseBackreferenceNodes ? culture : null, parser._nodeComments); + } + /// This static call constructs a flat concatenation node given a replacement pattern. public static RegexReplacement ParseReplacement(string pattern, RegexOptions options, Hashtable caps, int capsize, Hashtable capnames) { @@ -330,6 +372,7 @@ private RegexNode ScanRegex() if (isQuantifier) { _unit = RegexNode.CreateOneWithCaseConversion(_pattern[endpos - 1], _options, _culture, ref _caseBehavior); + AttachCommentsToNode(_unit); } } @@ -345,6 +388,7 @@ private RegexNode ScanRegex() { string setString = ScanCharClass((_options & RegexOptions.IgnoreCase) != 0, scanOnly: false)!.ToStringClass(); _unit = new RegexNode(RegexNodeKind.Set, _options & ~RegexOptions.IgnoreCase, setString); + AttachCommentsToNode(_unit); } break; @@ -352,6 +396,7 @@ private RegexNode ScanRegex() _optionsStack.Append((int)_options); if (ScanGroupOpen() is RegexNode grouper) { + AttachCommentsToNode(grouper); PushGroup(); StartGroup(grouper); } @@ -388,20 +433,27 @@ private RegexNode ScanRegex() } _unit = ScanBackslash(scanOnly: false)!; + if (_unit is not null) + { + AttachCommentsToNode(_unit); + } break; case '^': _unit = new RegexNode((_options & RegexOptions.Multiline) != 0 ? RegexNodeKind.Bol : RegexNodeKind.Beginning, _options); + AttachCommentsToNode(_unit); break; case '$': _unit = new RegexNode((_options & RegexOptions.Multiline) != 0 ? RegexNodeKind.Eol : RegexNodeKind.EndZ, _options); + AttachCommentsToNode(_unit); break; case '.': _unit = (_options & RegexOptions.Singleline) != 0 ? new RegexNode(RegexNodeKind.Set, _options & ~RegexOptions.IgnoreCase, RegexCharClass.AnyClass) : new RegexNode(RegexNodeKind.Notone, _options & ~RegexOptions.IgnoreCase, '\n'); + AttachCommentsToNode(_unit); break; case '{': @@ -1048,14 +1100,25 @@ private void ScanBlank() if ((_options & RegexOptions.IgnorePatternWhitespace) != 0 && _pos < _pattern.Length && _pattern[_pos] == '#') { + int commentStart = _pos + 1; // Skip the '#' _pos = _pattern.IndexOf('\n', _pos); if (_pos < 0) { _pos = _pattern.Length; } + + if (_captureComments && commentStart < _pos) + { + string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim(); + if (!string.IsNullOrEmpty(comment)) + { + _pendingComments!.Add(comment); + } + } } else if (_pos + 2 < _pattern.Length && _pattern[_pos + 2] == '#' && _pattern[_pos + 1] == '?' && _pattern[_pos] == '(') { + int commentStart = _pos + 3; // Skip '(?#' _pos = _pattern.IndexOf(')', _pos); if (_pos < 0) { @@ -1063,6 +1126,15 @@ private void ScanBlank() throw MakeException(RegexParseError.UnterminatedComment, SR.UnterminatedComment); } + if (_captureComments && commentStart < _pos) + { + string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim(); + if (!string.IsNullOrEmpty(comment)) + { + _pendingComments!.Add(comment); + } + } + _pos++; } else @@ -1072,6 +1144,22 @@ private void ScanBlank() } } + /// Attaches any pending comments to the specified node. + private void AttachCommentsToNode(RegexNode node) + { + if (_captureComments && _pendingComments!.Count > 0) + { + if (!_nodeComments!.TryGetValue(node, out List? comments)) + { + comments = new List(); + _nodeComments[node] = comments; + } + + comments.AddRange(_pendingComments); + _pendingComments.Clear(); + } + } + /// Scans chars following a '\' (not counting the '\'), and returns a RegexNode for the type of atom scanned private RegexNode? ScanBackslash(bool scanOnly) { diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs index 8a827001cc2f98..1510be31c2b21b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Collections; +using System.Collections.Generic; using System.Diagnostics; using System.Globalization; @@ -39,8 +40,10 @@ internal sealed class RegexTree /// capture group number and the value is the index into for that capture group. /// public readonly Hashtable? CaptureNumberSparseMapping; + /// A mapping of RegexNode to its associated comments from the pattern (for source generator use only). + public readonly Dictionary>? NodeComments; - internal RegexTree(RegexNode root, int captureCount, string[]? captureNames, Hashtable? captureNameToNumberMapping, Hashtable? captureNumberSparseMapping, RegexOptions options, CultureInfo? culture) + internal RegexTree(RegexNode root, int captureCount, string[]? captureNames, Hashtable? captureNameToNumberMapping, Hashtable? captureNumberSparseMapping, RegexOptions options, CultureInfo? culture, Dictionary>? nodeComments = null) { #if DEBUG // Asserts to both demonstrate and validate the relationships between the various capture data structures. @@ -77,6 +80,7 @@ internal RegexTree(RegexNode root, int captureCount, string[]? captureNames, Has CaptureNameToNumberMapping = captureNameToNumberMapping; CaptureNames = captureNames; Options = options; + NodeComments = nodeComments; FindOptimizations = RegexFindOptimizations.Create(root, options); } } From e6a2bd2a6f02fc4a61effb1aea1486e3f45e93cb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Oct 2025 18:16:53 +0000 Subject: [PATCH 3/8] Complete regex comment propagation implementation - Capture both # and (?#) style comments when parsing for source generator - Store comments in side-channel dictionary (Dictionary>) - Attach comments to nodes as they are created during parsing - Pass comments through RegexTree with internal field - Emit comments in generator output before node descriptions - Add InternalsVisibleTo for test access - Add unit tests for comment capture functionality Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../src/AssemblyInfo.cs | 6 +++ .../Text/RegularExpressions/RegexTree.cs | 2 +- .../RegexParserTests.netcoreapp.cs | 53 +++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 src/libraries/System.Text.RegularExpressions/src/AssemblyInfo.cs diff --git a/src/libraries/System.Text.RegularExpressions/src/AssemblyInfo.cs b/src/libraries/System.Text.RegularExpressions/src/AssemblyInfo.cs new file mode 100644 index 00000000000000..2911955730ce0e --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/AssemblyInfo.cs @@ -0,0 +1,6 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; + +[assembly: InternalsVisibleTo("System.Text.RegularExpressions.Tests")] diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs index 1510be31c2b21b..25f9d8446f6cad 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs @@ -41,7 +41,7 @@ internal sealed class RegexTree /// public readonly Hashtable? CaptureNumberSparseMapping; /// A mapping of RegexNode to its associated comments from the pattern (for source generator use only). - public readonly Dictionary>? NodeComments; + internal readonly Dictionary>? NodeComments; internal RegexTree(RegexNode root, int captureCount, string[]? captureNames, Hashtable? captureNameToNumberMapping, Hashtable? captureNumberSparseMapping, RegexOptions options, CultureInfo? culture, Dictionary>? nodeComments = null) { diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs index a3e02e8439e2a9..aa4dbb1e915f99 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs @@ -312,5 +312,58 @@ static partial void MayThrow(Action action) throw new XunitException($"Expected RegexParseException or no exception -> Actual: ({e})"); } } + + [Fact] + public void ParseForSourceGenerator_CapturesComments() + { + // Test with (?x) inline option and # comments + string pattern = @"(?x) + ^ # Start of line + \w+ # Word characters + "; + + RegexTree tree = RegexParser.ParseForSourceGenerator(pattern, RegexOptions.None, System.Globalization.CultureInfo.InvariantCulture); + + Assert.NotNull(tree.NodeComments); + Assert.NotEmpty(tree.NodeComments); + + // Verify some comments were captured + bool foundComment = false; + foreach (var kvp in tree.NodeComments) + { + if (kvp.Value.Count > 0) + { + foundComment = true; + break; + } + } + Assert.True(foundComment, "Expected to find at least one comment"); + } + + [Fact] + public void ParseForSourceGenerator_CapturesInlineComments() + { + // Test with (?# ) inline comments + string pattern = @"(?#This is a comment)abc(?#Another comment)"; + + RegexTree tree = RegexParser.ParseForSourceGenerator(pattern, RegexOptions.None, System.Globalization.CultureInfo.InvariantCulture); + + Assert.NotNull(tree.NodeComments); + Assert.NotEmpty(tree.NodeComments); + } + + [Fact] + public void Parse_DoesNotCaptureComments() + { + // Test that regular Parse doesn't capture comments + string pattern = @"(?x) + ^ # Start of line + \w+ # Word characters + "; + + RegexTree tree = RegexParser.Parse(pattern, RegexOptions.None, System.Globalization.CultureInfo.InvariantCulture); + + Assert.Null(tree.NodeComments); + } } } From 1c76030b18338b9fbd98a1a34750cb4411064cfb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Oct 2025 01:34:06 +0000 Subject: [PATCH 4/8] Address PR feedback: simplify implementation - Remove InternalsVisibleTo attribute and AssemblyInfo.cs - Merge ParseForSourceGenerator into Parse with captureComments parameter - Remove _captureComments field, use null checks on _pendingComments instead - Update tests to use Parse(captureComments: true) - Update generator to call Parse(captureComments: true) Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../gen/RegexGenerator.cs | 2 +- .../src/AssemblyInfo.cs | 6 -- .../Text/RegularExpressions/RegexParser.cs | 57 ++++--------------- .../RegexParserTests.netcoreapp.cs | 12 ++-- 4 files changed, 19 insertions(+), 58 deletions(-) delete mode 100644 src/libraries/System.Text.RegularExpressions/src/AssemblyInfo.cs diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs index 67f2cd3da2bc23..48653b79d4a341 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs @@ -75,7 +75,7 @@ public void Initialize(IncrementalGeneratorInitializationContext context) { try { - RegexTree regexTree = RegexParser.ParseForSourceGenerator(method.Pattern, method.Options | RegexOptions.Compiled, method.Culture); // make sure Compiled is included to get all optimizations applied to it + RegexTree regexTree = RegexParser.Parse(method.Pattern, method.Options | RegexOptions.Compiled, method.Culture, captureComments: true); // make sure Compiled is included to get all optimizations applied to it AnalysisResults analysis = RegexTreeAnalyzer.Analyze(regexTree); return new RegexMethod(method.DeclaringType, method.IsProperty, method.DiagnosticLocation, method.MemberName, method.Modifiers, method.NullableRegex, method.Pattern, method.Options, method.MatchTimeout, regexTree, analysis, method.CompilationData); } diff --git a/src/libraries/System.Text.RegularExpressions/src/AssemblyInfo.cs b/src/libraries/System.Text.RegularExpressions/src/AssemblyInfo.cs deleted file mode 100644 index 2911955730ce0e..00000000000000 --- a/src/libraries/System.Text.RegularExpressions/src/AssemblyInfo.cs +++ /dev/null @@ -1,6 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Runtime.CompilerServices; - -[assembly: InternalsVisibleTo("System.Text.RegularExpressions.Tests")] diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs index 0538917569bb4b..c1f5fd22df0c95 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs @@ -52,7 +52,6 @@ internal ref struct RegexParser private bool _ignoreNextParen; // flag to skip capturing a parentheses group - private bool _captureComments; // flag to enable comment capture for source generator private Dictionary>? _nodeComments; // side-channel for storing comments associated with nodes private List? _pendingComments; // comments waiting to be associated with the next node @@ -84,9 +83,11 @@ private RegexParser(string pattern, RegexOptions options, CultureInfo culture, H _capnamelist = null; _ignoreNextParen = false; - _captureComments = captureComments; - _nodeComments = captureComments ? new Dictionary>() : null; - _pendingComments = captureComments ? new List() : null; + if (captureComments) + { + _nodeComments = new Dictionary>(); + _pendingComments = new List(); + } } /// Gets the culture to use based on the specified options. @@ -108,43 +109,9 @@ public static RegexOptions ParseOptionsInPattern(string pattern, RegexOptions op return foundOptionsInPattern; } - public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture) - { - using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize], captureComments: false); - - parser.CountCaptures(out _); - parser.Reset(options); - RegexNode root = parser.ScanRegex(); - - int[]? captureNumberList = parser._capnumlist; - Hashtable? sparseMapping = parser._caps; - int captop = parser._captop; - - int captureCount; - if (captureNumberList == null || captop == captureNumberList.Length) - { - // The capture list isn't sparse. Null out the capture mapping as it's not necessary, - // and store the number of captures. - captureCount = captop; - sparseMapping = null; - } - else - { - // The capture list is sparse. Store the number of captures, and populate the number-to-names-list. - captureCount = captureNumberList.Length; - for (int i = 0; i < captureNumberList.Length; i++) - { - sparseMapping[captureNumberList[i]] = i; - } - } - - return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, parser._hasIgnoreCaseBackreferenceNodes ? culture : null); - } - - /// Parses a regular expression and captures comments for source generation. - public static RegexTree ParseForSourceGenerator(string pattern, RegexOptions options, CultureInfo culture) + public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture, bool captureComments = false) { - using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize], captureComments: true); + using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize], captureComments); parser.CountCaptures(out _); parser.Reset(options); @@ -1107,12 +1074,12 @@ private void ScanBlank() _pos = _pattern.Length; } - if (_captureComments && commentStart < _pos) + if (_pendingComments is not null && commentStart < _pos) { string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim(); if (!string.IsNullOrEmpty(comment)) { - _pendingComments!.Add(comment); + _pendingComments.Add(comment); } } } @@ -1126,12 +1093,12 @@ private void ScanBlank() throw MakeException(RegexParseError.UnterminatedComment, SR.UnterminatedComment); } - if (_captureComments && commentStart < _pos) + if (_pendingComments is not null && commentStart < _pos) { string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim(); if (!string.IsNullOrEmpty(comment)) { - _pendingComments!.Add(comment); + _pendingComments.Add(comment); } } @@ -1147,7 +1114,7 @@ private void ScanBlank() /// Attaches any pending comments to the specified node. private void AttachCommentsToNode(RegexNode node) { - if (_captureComments && _pendingComments!.Count > 0) + if (_pendingComments is not null && _pendingComments.Count > 0) { if (!_nodeComments!.TryGetValue(node, out List? comments)) { diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs index aa4dbb1e915f99..bd928197c9daf3 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs @@ -314,7 +314,7 @@ static partial void MayThrow(Action action) } [Fact] - public void ParseForSourceGenerator_CapturesComments() + public void Parse_CapturesComments() { // Test with (?x) inline option and # comments string pattern = @"(?x) @@ -322,7 +322,7 @@ public void ParseForSourceGenerator_CapturesComments() \w+ # Word characters "; - RegexTree tree = RegexParser.ParseForSourceGenerator(pattern, RegexOptions.None, System.Globalization.CultureInfo.InvariantCulture); + RegexTree tree = RegexParser.Parse(pattern, RegexOptions.None, System.Globalization.CultureInfo.InvariantCulture, captureComments: true); Assert.NotNull(tree.NodeComments); Assert.NotEmpty(tree.NodeComments); @@ -341,21 +341,21 @@ public void ParseForSourceGenerator_CapturesComments() } [Fact] - public void ParseForSourceGenerator_CapturesInlineComments() + public void Parse_CapturesInlineComments() { // Test with (?# ) inline comments string pattern = @"(?#This is a comment)abc(?#Another comment)"; - RegexTree tree = RegexParser.ParseForSourceGenerator(pattern, RegexOptions.None, System.Globalization.CultureInfo.InvariantCulture); + RegexTree tree = RegexParser.Parse(pattern, RegexOptions.None, System.Globalization.CultureInfo.InvariantCulture, captureComments: true); Assert.NotNull(tree.NodeComments); Assert.NotEmpty(tree.NodeComments); } [Fact] - public void Parse_DoesNotCaptureComments() + public void Parse_DoesNotCaptureCommentsByDefault() { - // Test that regular Parse doesn't capture comments + // Test that regular Parse doesn't capture comments by default string pattern = @"(?x) ^ # Start of line \w+ # Word characters From e51247183d42e51d64fb5dcea55bcc7ea68a837c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Oct 2025 19:58:17 +0000 Subject: [PATCH 5/8] Fix build failures: remove internal type tests from FunctionalTests Tests that access internal types (RegexTree, RegexParser) have been removed from the FunctionalTests project. The comment capture feature can be validated through the generated source code output. Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../RegexParserTests.netcoreapp.cs | 53 ------------------- 1 file changed, 53 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs index bd928197c9daf3..a3e02e8439e2a9 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs @@ -312,58 +312,5 @@ static partial void MayThrow(Action action) throw new XunitException($"Expected RegexParseException or no exception -> Actual: ({e})"); } } - - [Fact] - public void Parse_CapturesComments() - { - // Test with (?x) inline option and # comments - string pattern = @"(?x) - ^ # Start of line - \w+ # Word characters - "; - - RegexTree tree = RegexParser.Parse(pattern, RegexOptions.None, System.Globalization.CultureInfo.InvariantCulture, captureComments: true); - - Assert.NotNull(tree.NodeComments); - Assert.NotEmpty(tree.NodeComments); - - // Verify some comments were captured - bool foundComment = false; - foreach (var kvp in tree.NodeComments) - { - if (kvp.Value.Count > 0) - { - foundComment = true; - break; - } - } - Assert.True(foundComment, "Expected to find at least one comment"); - } - - [Fact] - public void Parse_CapturesInlineComments() - { - // Test with (?# ) inline comments - string pattern = @"(?#This is a comment)abc(?#Another comment)"; - - RegexTree tree = RegexParser.Parse(pattern, RegexOptions.None, System.Globalization.CultureInfo.InvariantCulture, captureComments: true); - - Assert.NotNull(tree.NodeComments); - Assert.NotEmpty(tree.NodeComments); - } - - [Fact] - public void Parse_DoesNotCaptureCommentsByDefault() - { - // Test that regular Parse doesn't capture comments by default - string pattern = @"(?x) - ^ # Start of line - \w+ # Word characters - "; - - RegexTree tree = RegexParser.Parse(pattern, RegexOptions.None, System.Globalization.CultureInfo.InvariantCulture); - - Assert.Null(tree.NodeComments); - } } } From 56bba803d271f76914e2d24532394b899ee581d5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 19:24:51 +0000 Subject: [PATCH 6/8] Use 'is true' instead of '== true' for better C# style Co-authored-by: tarekgh <10833894+tarekgh@users.noreply.github.com> --- .../gen/RegexGenerator.Emitter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index c9db970bd8afa7..56ef489c6cec05 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -5805,7 +5805,7 @@ RegexNodeKind.BackreferenceConditional when node.Parent.Child(1) == node => "Not string nodeDescription = DescribeNode(node, rm); // Write out any comments associated with this node. - if (rm.Tree.NodeComments?.TryGetValue(node, out List? comments) == true) + if (rm.Tree.NodeComments?.TryGetValue(node, out List? comments) is true) { string indent = new string(' ', depth * 4); foreach (string comment in comments) From b5e1e5066cce49a0df32c02d9c03c0821c224b10 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 22:41:42 +0000 Subject: [PATCH 7/8] Handle multi-line comments to maintain proper alignment Split comments on newlines and trim each line to ensure proper alignment in the generated XML documentation. Co-authored-by: tarekgh <10833894+tarekgh@users.noreply.github.com> --- .../gen/RegexGenerator.Emitter.cs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 56ef489c6cec05..2796051b622d2a 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -5810,7 +5810,16 @@ RegexNodeKind.BackreferenceConditional when node.Parent.Child(1) == node => "Not string indent = new string(' ', depth * 4); foreach (string comment in comments) { - writer.WriteLine($"/// {indent}// {EscapeXmlComment(comment)}
"); + // Split multi-line comments to maintain proper alignment + string[] lines = comment.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); + foreach (string line in lines) + { + string trimmedLine = line.Trim(); + if (!string.IsNullOrEmpty(trimmedLine)) + { + writer.WriteLine($"/// {indent}// {EscapeXmlComment(trimmedLine)}
"); + } + } } } From fad8a2301b3b6c6d1c12aafc91c0b6b501cb5c6e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 23:07:45 +0000 Subject: [PATCH 8/8] Preserve empty comments and add test for comment propagation - Remove empty comment filtering to preserve visual separation in patterns - Add test case ValidateCommentsInGeneratedCode to verify comments appear in generated code - Empty comments (# or (?#)) are now preserved for better formatting Co-authored-by: tarekgh <10833894+tarekgh@users.noreply.github.com> --- .../gen/RegexGenerator.Emitter.cs | 5 +--- .../Text/RegularExpressions/RegexParser.cs | 12 ++++------ .../RegexGeneratorOutputTests.cs | 24 +++++++++++++++++++ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 2796051b622d2a..08a41b20b60133 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -5815,10 +5815,7 @@ RegexNodeKind.BackreferenceConditional when node.Parent.Child(1) == node => "Not foreach (string line in lines) { string trimmedLine = line.Trim(); - if (!string.IsNullOrEmpty(trimmedLine)) - { - writer.WriteLine($"/// {indent}// {EscapeXmlComment(trimmedLine)}
"); - } + writer.WriteLine($"/// {indent}// {EscapeXmlComment(trimmedLine)}
"); } } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs index c1f5fd22df0c95..12929c5b0f7f05 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs @@ -1077,10 +1077,8 @@ private void ScanBlank() if (_pendingComments is not null && commentStart < _pos) { string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim(); - if (!string.IsNullOrEmpty(comment)) - { - _pendingComments.Add(comment); - } + // Preserve even empty comments for visual separation + _pendingComments.Add(comment); } } else if (_pos + 2 < _pattern.Length && _pattern[_pos + 2] == '#' && _pattern[_pos + 1] == '?' && _pattern[_pos] == '(') @@ -1096,10 +1094,8 @@ private void ScanBlank() if (_pendingComments is not null && commentStart < _pos) { string comment = _pattern.Substring(commentStart, _pos - commentStart).Trim(); - if (!string.IsNullOrEmpty(comment)) - { - _pendingComments.Add(comment); - } + // Preserve even empty comments for visual separation + _pendingComments.Add(comment); } _pos++; diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs index 3792e4704a1af6..12a1b633463a81 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs @@ -1193,5 +1193,29 @@ partial class C // The actual pattern string should properly escape the newline for C# Assert.Contains("base.pattern = \"\\n\";", actual); } + + [Fact] + public async Task ValidateCommentsInGeneratedCode() + { + string program = """ + using System.Text.RegularExpressions; + partial class C + { + [GeneratedRegex(@"(?x) + ^ # Start of line + \w+ # Word characters + $ # End of line + ")] + public static partial Regex WithComments(); + } + """; + + string actual = await RegexGeneratorHelper.GenerateSourceText(program, allowUnsafe: true, checkOverflow: false); + + // Verify comments appear in the explanation section + Assert.Contains("// Start of line", actual); + Assert.Contains("// Word characters", actual); + Assert.Contains("// End of line", actual); + } } }