From 6af8de06203039e7c75d8425129d988e3c7b35b1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 20 Jun 2025 23:15:28 +0000
Subject: [PATCH 1/4] Initial plan for issue


From 23278b9df152b48c8b2c71f8939a1a217a877d1b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 20 Jun 2025 23:20:18 +0000
Subject: [PATCH 2/4] Add failing test to reproduce TextChunker newline
 splitting issue

Co-authored-by: shethaadit <10839617+shethaadit@users.noreply.github.com>
---
 dotnet/global.json                              |  2 +-
 .../Text/TextChunkerTests.cs                    | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/dotnet/global.json b/dotnet/global.json
index e00bec377198..43376b545c46 100644
--- a/dotnet/global.json
+++ b/dotnet/global.json
@@ -1,6 +1,6 @@
 {
   "sdk": {
-    "version": "9.0.300",
+    "version": "8.0.116",
     "rollForward": "latestMajor",
     "allowPrerelease": false
   }
diff --git a/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs b/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs
index 807282a2778a..a114c52842eb 100644
--- a/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs
+++ b/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs
@@ -777,4 +777,21 @@ public void CanSplitTextParagraphsWithOverlapAndHeaderAndCustomTokenCounter()
 
         Assert.Equal(expected, result);
     }
+
+    [Theory]
+    [InlineData("First line\r\nSecond line\r\nThird line")]
+    [InlineData("First line\nSecond line\nThird line")]
+    public void ActuallySplitsOnNewLines(string input)
+    {
+        var result = TextChunker.SplitPlainTextLines(input, 100);
+
+        var expected = new[]
+        {
+            "First line",
+            "Second line", 
+            "Third line"
+        };
+
+        Assert.Equal(expected, result); // Currently fails
+    }
 }

From 33ad4a8ec0ca46572557e5bcbb69715b928ae85e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 20 Jun 2025 23:27:04 +0000
Subject: [PATCH 3/4] Fix SplitPlainTextLines to split on newlines when token
 count is low

Co-authored-by: shethaadit <10839617+shethaadit@users.noreply.github.com>
---
 .../SemanticKernel.Core/Text/TextChunker.cs   | 35 +++++++++++++++++--
 .../Text/TextChunkerTests.cs                  | 23 +++++++++++-
 2 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/dotnet/src/SemanticKernel.Core/Text/TextChunker.cs b/dotnet/src/SemanticKernel.Core/Text/TextChunker.cs
index 333528bf5e50..96342c958a49 100644
--- a/dotnet/src/SemanticKernel.Core/Text/TextChunker.cs
+++ b/dotnet/src/SemanticKernel.Core/Text/TextChunker.cs
@@ -52,7 +52,7 @@ private sealed class StringListWithTokenCount(TextChunker.TokenCounter? tokenCou
     public delegate int TokenCounter(string input);
 
     private static readonly char[] s_spaceChar = [' '];
-    private static readonly string?[] s_plaintextSplitOptions = ["\n\r", ".。．", "?!", ";", ":", ",，、", ")]}", " ", "-", null];
+    private static readonly string?[] s_plaintextSplitOptions = ["\n", ".。．", "?!", ";", ":", ",，、", ")]}", " ", "-", null];
     private static readonly string?[] s_markdownSplitOptions = [".\u3002\uFF0E", "?!", ";", ":", ",\uFF0C\u3001", ")]}", " ", "-", "\n\r", null];
 
     /// <summary>
@@ -62,8 +62,37 @@ private sealed class StringListWithTokenCount(TextChunker.TokenCounter? tokenCou
     /// <param name="maxTokensPerLine">Maximum number of tokens per line.</param>
     /// <param name="tokenCounter">Function to count tokens in a string. If not supplied, the default counter will be used.</param>
     /// <returns>List of lines.</returns>
-    public static List<string> SplitPlainTextLines(string text, int maxTokensPerLine, TokenCounter? tokenCounter = null) =>
-        InternalSplitLines(text, maxTokensPerLine, trim: true, s_plaintextSplitOptions, tokenCounter);
+    public static List<string> SplitPlainTextLines(string text, int maxTokensPerLine, TokenCounter? tokenCounter = null)
+    {
+        if (string.IsNullOrEmpty(text))
+        {
+            return [];
+        }
+
+        // Normalize line endings first
+        text = text.Replace("\r\n", "\n");
+
+        // Check if the text contains newlines and the total token count is within limits
+        var totalTokenCount = GetTokenCount(text, tokenCounter);
+        if (text.Contains('\n') && totalTokenCount <= maxTokensPerLine)
+        {
+            // Split on newlines regardless of token count, as the method name suggests line-based splitting
+            var lines = text.Split('\n', StringSplitOptions.RemoveEmptyEntries);
+            var result = new List<string>();
+            foreach (var line in lines)
+            {
+                var trimmedLine = line.Trim();
+                if (!string.IsNullOrEmpty(trimmedLine))
+                {
+                    result.Add(trimmedLine);
+                }
+            }
+            return result;
+        }
+
+        // Fall back to the original hierarchical splitting for long text or text without newlines
+        return InternalSplitLines(text, maxTokensPerLine, trim: true, s_plaintextSplitOptions, tokenCounter);
+    }
 
     /// <summary>
     /// Split markdown text into lines.
diff --git a/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs b/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs
index a114c52842eb..0deb64d379a9 100644
--- a/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs
+++ b/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs
@@ -788,10 +788,31 @@ public void ActuallySplitsOnNewLines(string input)
         var expected = new[]
         {
             "First line",
-            "Second line", 
+            "Second line",
             "Third line"
         };
 
         Assert.Equal(expected, result); // Currently fails
     }
+
+    [Fact]
+    public void SplitPlainTextLinesHandlesEdgeCases()
+    {
+        // Test with low token limit to ensure hierarchical splitting still works
+        var result1 = TextChunker.SplitPlainTextLines("This is a very long line without any newlines that should be split using hierarchical approach", 5);
+        Assert.True(result1.Count > 1, "Long line without newlines should be split hierarchically");
+
+        // Test with high token limit and newlines - should split on newlines
+        var result2 = TextChunker.SplitPlainTextLines("Short line\nAnother short line", 100);
+        Assert.Equal(new[] { "Short line", "Another short line" }, result2);
+
+        // Test empty string
+        var result3 = TextChunker.SplitPlainTextLines("", 10);
+        Assert.Empty(result3);
+
+        // Test single line without newlines
+        var result4 = TextChunker.SplitPlainTextLines("Single line without newlines", 100);
+        Assert.Single(result4);
+        Assert.Equal("Single line without newlines", result4[0]);
+    }
 }

From 7bdd7b6a499823124768786d96f0bc6ec45fb937 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 20 Jun 2025 23:31:59 +0000
Subject: [PATCH 4/4] Complete TextChunker fix with comprehensive tests and
 backward compatibility

Co-authored-by: shethaadit <10839617+shethaadit@users.noreply.github.com>
---
 dotnet/global.json                            |  2 +-
 .../SemanticKernel.Core/Text/TextChunker.cs   |  7 ++++---
 .../Text/TextChunkerTests.cs                  | 19 +++++++++++++++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/dotnet/global.json b/dotnet/global.json
index 43376b545c46..e00bec377198 100644
--- a/dotnet/global.json
+++ b/dotnet/global.json
@@ -1,6 +1,6 @@
 {
   "sdk": {
-    "version": "8.0.116",
+    "version": "9.0.300",
     "rollForward": "latestMajor",
     "allowPrerelease": false
   }
diff --git a/dotnet/src/SemanticKernel.Core/Text/TextChunker.cs b/dotnet/src/SemanticKernel.Core/Text/TextChunker.cs
index 96342c958a49..90b600584e4d 100644
--- a/dotnet/src/SemanticKernel.Core/Text/TextChunker.cs
+++ b/dotnet/src/SemanticKernel.Core/Text/TextChunker.cs
@@ -52,7 +52,7 @@ private sealed class StringListWithTokenCount(TextChunker.TokenCounter? tokenCou
     public delegate int TokenCounter(string input);
 
     private static readonly char[] s_spaceChar = [' '];
-    private static readonly string?[] s_plaintextSplitOptions = ["\n", ".。．", "?!", ";", ":", ",，、", ")]}", " ", "-", null];
+    private static readonly string?[] s_plaintextSplitOptions = ["\n\r", ".。．", "?!", ";", ":", ",，、", ")]}", " ", "-", null];
     private static readonly string?[] s_markdownSplitOptions = [".\u3002\uFF0E", "?!", ";", ":", ",\uFF0C\u3001", ")]}", " ", "-", "\n\r", null];
 
     /// <summary>
@@ -74,10 +74,11 @@ public static List<string> SplitPlainTextLines(string text, int maxTokensPerLine
 
         // Check if the text contains newlines and the total token count is within limits
         var totalTokenCount = GetTokenCount(text, tokenCounter);
-        if (text.Contains('\n') && totalTokenCount <= maxTokensPerLine)
+        if ((text.Contains('\n') || text.Contains('\r')) && totalTokenCount <= maxTokensPerLine)
         {
             // Split on newlines regardless of token count, as the method name suggests line-based splitting
-            var lines = text.Split('\n', StringSplitOptions.RemoveEmptyEntries);
+            char[] separators = ['\n', '\r'];
+            var lines = text.Split(separators, StringSplitOptions.RemoveEmptyEntries);
             var result = new List<string>();
             foreach (var line in lines)
             {
diff --git a/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs b/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs
index 0deb64d379a9..294bf2be21d2 100644
--- a/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs
+++ b/dotnet/src/SemanticKernel.UnitTests/Text/TextChunkerTests.cs
@@ -2,6 +2,7 @@
 
 using System;
 using System.Collections.Generic;
+using System.Linq;
 using System.Text;
 using Microsoft.SemanticKernel.Text;
 using Xunit;
@@ -815,4 +816,22 @@ public void SplitPlainTextLinesHandlesEdgeCases()
         Assert.Single(result4);
         Assert.Equal("Single line without newlines", result4[0]);
     }
+
+    [Fact]
+    public void SplitPlainTextLinesWorksWithMixedLineEndings()
+    {
+        // Test mixed line endings with different combinations
+        var result1 = TextChunker.SplitPlainTextLines("Line1\r\nLine2\nLine3\rLine4", 100);
+        Assert.Equal(new[] { "Line1", "Line2", "Line3", "Line4" }, result1);
+
+        // Test that token limits still work when splitting on newlines
+        var result2 = TextChunker.SplitPlainTextLines("Short\nLine", 2);
+        Assert.Equal(new[] { "Short", "Line" }, result2);
+
+        // Test that very long text still uses hierarchical splitting
+        var longText = string.Join("", Enumerable.Repeat("word ", 1000));
+        var result3 = TextChunker.SplitPlainTextLines(longText, 10);
+        Assert.True(result3.Count > 1, "Long text should be split hierarchically");
+        Assert.True(result3.All(s => !s.Contains('\n')), "Hierarchical split should not contain newlines");
+    }
 }