From 8393c506122c239abc0c17fa3a26360b60bb4cea Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 16:51:27 +0000 Subject: [PATCH 1/2] Fix #347: Preserve HTML entities like © during processing Co-Authored-By: m@martinnormark.com --- ...reserveEntitiesHtmlMarkupFormatterTests.cs | 30 ++++ .../PreserveEntitiesTests.cs | 20 +++ .../PreserveEntitiesHtmlMarkupFormatter.cs | 40 +++++ PreMailer.Net/PreMailer.Net/PreMailer.cs | 170 ++++++++++-------- 4 files changed, 184 insertions(+), 76 deletions(-) create mode 100644 PreMailer.Net/PreMailer.Net.Tests/Html/PreserveEntitiesHtmlMarkupFormatterTests.cs create mode 100644 PreMailer.Net/PreMailer.Net.Tests/PreserveEntitiesTests.cs create mode 100644 PreMailer.Net/PreMailer.Net/Html/PreserveEntitiesHtmlMarkupFormatter.cs diff --git a/PreMailer.Net/PreMailer.Net.Tests/Html/PreserveEntitiesHtmlMarkupFormatterTests.cs b/PreMailer.Net/PreMailer.Net.Tests/Html/PreserveEntitiesHtmlMarkupFormatterTests.cs new file mode 100644 index 0000000..068b06a --- /dev/null +++ b/PreMailer.Net/PreMailer.Net.Tests/Html/PreserveEntitiesHtmlMarkupFormatterTests.cs @@ -0,0 +1,30 @@ +using AngleSharp.Dom; +using AngleSharp.Html.Parser; +using PreMailer.Net.Html; +using System; +using System.IO; +using Xunit; + +namespace PreMailer.Net.Tests.Html +{ + public class PreserveEntitiesHtmlMarkupFormatterTests + { + [Fact] + public void Text_GivenHtmlWithCopyEntity_PreservesEntity() + { + string html = "

© 2025

"; + var document = new HtmlParser().ParseDocument(html); + var formatter = PreserveEntitiesHtmlMarkupFormatter.Instance; + + string result; + using (var sw = new StringWriter()) + { + document.ToHtml(sw, formatter); + result = sw.ToString(); + } + + Assert.Contains("©", result); + Assert.DoesNotContain("©", result); + } + } +} diff --git a/PreMailer.Net/PreMailer.Net.Tests/PreserveEntitiesTests.cs b/PreMailer.Net/PreMailer.Net.Tests/PreserveEntitiesTests.cs new file mode 100644 index 0000000..4e4a51c --- /dev/null +++ b/PreMailer.Net/PreMailer.Net.Tests/PreserveEntitiesTests.cs @@ -0,0 +1,20 @@ +using System; +using Xunit; + +namespace PreMailer.Net.Tests +{ + public class PreserveEntitiesTests + { + [Fact] + public void MoveCssInline_GivenCopyEntity_PreservesEntity() + { + string htmlEncoded = "©"; + string input = $"
{htmlEncoded}
"; + + var premailedOutput = PreMailer.MoveCssInline(input, preserveEntities: true); + + Assert.Contains(htmlEncoded, premailedOutput.Html); + Assert.DoesNotContain("©", premailedOutput.Html); + } + } +} diff --git a/PreMailer.Net/PreMailer.Net/Html/PreserveEntitiesHtmlMarkupFormatter.cs b/PreMailer.Net/PreMailer.Net/Html/PreserveEntitiesHtmlMarkupFormatter.cs new file mode 100644 index 0000000..09aa5eb --- /dev/null +++ b/PreMailer.Net/PreMailer.Net/Html/PreserveEntitiesHtmlMarkupFormatter.cs @@ -0,0 +1,40 @@ +using AngleSharp.Html; +using AngleSharp.Dom; +using System; +using System.Text.RegularExpressions; +using System.Collections.Generic; + +namespace PreMailer.Net.Html +{ + public class PreserveEntitiesHtmlMarkupFormatter : HtmlMarkupFormatter + { + private static readonly Dictionary EntityReplacements = new Dictionary + { + { "©", "©" }, + { "®", "®" }, + { "™", "™" }, + { "£", "£" }, + { "€", "€" }, + { "¥", "¥" }, + { "§", "§" }, + { "±", "±" }, + { "¼", "¼" }, + { "½", "½" }, + { "¾", "¾" } + }; + + public static new readonly PreserveEntitiesHtmlMarkupFormatter Instance = new PreserveEntitiesHtmlMarkupFormatter(); + + public override string Text(ICharacterData text) + { + var result = base.Text(text); + + foreach (var entity in EntityReplacements) + { + result = result.Replace(entity.Key, entity.Value); + } + + return result; + } + } +} diff --git a/PreMailer.Net/PreMailer.Net/PreMailer.cs b/PreMailer.Net/PreMailer.Net/PreMailer.cs index c259aa8..46a5e2c 100644 --- a/PreMailer.Net/PreMailer.Net/PreMailer.cs +++ b/PreMailer.Net/PreMailer.Net/PreMailer.cs @@ -6,6 +6,7 @@ using AngleSharp.Html.Parser; using AngleSharp.Xhtml; using PreMailer.Net.Extensions; +using PreMailer.Net.Html; using PreMailer.Net.Sources; using System; using System.Collections.Generic; @@ -67,85 +68,95 @@ public PreMailer(Stream stream, Uri baseUri = null) _cssSelectorParser = new CssSelectorParser(); } - /// - /// In-lines the CSS within the HTML given. - /// - /// The HTML input. - /// If set to true the style elements are removed. - /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) - /// A string containing a style-sheet for inlining. - /// True to strip ID and class attributes - /// True to remove comments, false to leave them intact - /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node - /// Returns the html input, with styles moved to inline attributes. - public static InlineResult MoveCssInline(string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false) - { - return new PreMailer(html).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries); - } + /// + /// In-lines the CSS within the HTML given. + /// + /// The HTML input. + /// If set to true the style elements are removed. + /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) + /// A string containing a style-sheet for inlining. + /// True to strip ID and class attributes + /// True to remove comments, false to leave them intact + /// Custom formatter to use + /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node + /// If set to true, HTML entities like © will be preserved instead of being converted to characters + /// Returns the html input, with styles moved to inline attributes. + public static InlineResult MoveCssInline(string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false) + { + return new PreMailer(html).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities); + } - /// - /// In-lines the CSS within the HTML given. - /// - /// The Stream input. - /// If set to true the style elements are removed. - /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) - /// A string containing a style-sheet for inlining. - /// True to strip ID and class attributes - /// True to remove comments, false to leave them intact - /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node - /// Returns the html input, with styles moved to inline attributes. - public static InlineResult MoveCssInline(Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false) - { - return new PreMailer(stream).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries); - } + /// + /// In-lines the CSS within the HTML given. + /// + /// The Stream input. + /// If set to true the style elements are removed. + /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) + /// A string containing a style-sheet for inlining. + /// True to strip ID and class attributes + /// True to remove comments, false to leave them intact + /// Custom formatter to use + /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node + /// If set to true, HTML entities like © will be preserved instead of being converted to characters + /// Returns the html input, with styles moved to inline attributes. + public static InlineResult MoveCssInline(Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false) + { + return new PreMailer(stream).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities); + } - /// - /// In-lines the CSS within the HTML given. - /// - /// /// The base url that will be used to resolve any relative urls - /// The Url that all relative urls will be off of. - /// The HTML input. - /// If set to true the style elements are removed. - /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) - /// A string containing a style-sheet for inlining. - /// True to strip ID and class attributes - /// True to remove comments, false to leave them intact - /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node - /// Returns the html input, with styles moved to inline attributes. - public static InlineResult MoveCssInline(Uri baseUri, string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false) - { - return new PreMailer(html, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries); - } + /// + /// In-lines the CSS within the HTML given. + /// + /// /// The base url that will be used to resolve any relative urls + /// The Url that all relative urls will be off of. + /// The HTML input. + /// If set to true the style elements are removed. + /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) + /// A string containing a style-sheet for inlining. + /// True to strip ID and class attributes + /// True to remove comments, false to leave them intact + /// Custom formatter to use + /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node + /// If set to true, HTML entities like © will be preserved instead of being converted to characters + /// Returns the html input, with styles moved to inline attributes. + public static InlineResult MoveCssInline(Uri baseUri, string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false) + { + return new PreMailer(html, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities); + } - /// - /// In-lines the CSS within the HTML given. - /// - /// /// The base url that will be used to resolve any relative urls - /// The Url that all relative urls will be off of. - /// The HTML input. - /// If set to true the style elements are removed. - /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) - /// A string containing a style-sheet for inlining. - /// True to strip ID and class attributes - /// True to remove comments, false to leave them intact - /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node - /// Returns the html input, with styles moved to inline attributes. - public static InlineResult MoveCssInline(Uri baseUri, Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false) - { - return new PreMailer(stream, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries); - } + /// + /// In-lines the CSS within the HTML given. + /// + /// /// The base url that will be used to resolve any relative urls + /// The Url that all relative urls will be off of. + /// The HTML input. + /// If set to true the style elements are removed. + /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) + /// A string containing a style-sheet for inlining. + /// True to strip ID and class attributes + /// True to remove comments, false to leave them intact + /// Custom formatter to use + /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node + /// If set to true, HTML entities like © will be preserved instead of being converted to characters + /// Returns the html input, with styles moved to inline attributes. + public static InlineResult MoveCssInline(Uri baseUri, Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false) + { + return new PreMailer(stream, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities); + } - /// - /// In-lines the CSS for the current HTML - /// - /// If set to true the style elements are removed. - /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) - /// A string containing a style-sheet for inlining. - /// True to strip ID and class attributes - /// True to remove comments, false to leave them intact - /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node - /// Returns the html input, with styles moved to inline attributes. - public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false) + /// + /// In-lines the CSS for the current HTML + /// + /// If set to true the style elements are removed. + /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) + /// A string containing a style-sheet for inlining. + /// True to strip ID and class attributes + /// True to remove comments, false to leave them intact + /// Custom formatter to use + /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node + /// If set to true, HTML entities like © will be preserved instead of being converted to characters + /// Returns the html input, with styles moved to inline attributes. + public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false) { // Store the variables used for inlining the CSS _removeStyleElements = removeStyleElements; @@ -184,7 +195,14 @@ public InlineResult MoveCssInline(bool removeStyleElements = false, string ignor RemoveCssComments(cssSourceNodes); } - IMarkupFormatter markupFormatter = customFormatter ?? GetMarkupFormatterForDocType(); + IMarkupFormatter markupFormatter = customFormatter; + + if (markupFormatter == null) + { + markupFormatter = preserveEntities ? + PreserveEntitiesHtmlMarkupFormatter.Instance : + GetMarkupFormatterForDocType(); + } using (var sw = new StringWriter()) { From 00a035bdbf6ce3c6c146c16d7add49eb84fed367 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 17:25:59 +0000 Subject: [PATCH 2/2] Move HTML entity preservation test to PreMailerTests class Co-Authored-By: m@martinnormark.com --- .../PreMailer.Net.Tests/PreMailerTests.cs | 12 +++++++++++ .../PreserveEntitiesTests.cs | 20 ------------------- 2 files changed, 12 insertions(+), 20 deletions(-) delete mode 100644 PreMailer.Net/PreMailer.Net.Tests/PreserveEntitiesTests.cs diff --git a/PreMailer.Net/PreMailer.Net.Tests/PreMailerTests.cs b/PreMailer.Net/PreMailer.Net.Tests/PreMailerTests.cs index 9923ece..5635888 100644 --- a/PreMailer.Net/PreMailer.Net.Tests/PreMailerTests.cs +++ b/PreMailer.Net/PreMailer.Net.Tests/PreMailerTests.cs @@ -686,5 +686,17 @@ public void MoveCssInline_GivenCssWithQuotes_ReplacesWithSingleQuotation() Assert.Contains("font-family: 'Roboto', sans-serif", premailedOutput.Html); } + + [Fact] + public void MoveCssInline_GivenCopyEntity_PreservesEntity() + { + string htmlEncoded = "©"; + string input = $"
{htmlEncoded}
"; + + var premailedOutput = PreMailer.MoveCssInline(input, preserveEntities: true); + + Assert.Contains(htmlEncoded, premailedOutput.Html); + Assert.DoesNotContain("©", premailedOutput.Html); + } } } diff --git a/PreMailer.Net/PreMailer.Net.Tests/PreserveEntitiesTests.cs b/PreMailer.Net/PreMailer.Net.Tests/PreserveEntitiesTests.cs deleted file mode 100644 index 4e4a51c..0000000 --- a/PreMailer.Net/PreMailer.Net.Tests/PreserveEntitiesTests.cs +++ /dev/null @@ -1,20 +0,0 @@ -using System; -using Xunit; - -namespace PreMailer.Net.Tests -{ - public class PreserveEntitiesTests - { - [Fact] - public void MoveCssInline_GivenCopyEntity_PreservesEntity() - { - string htmlEncoded = "©"; - string input = $"
{htmlEncoded}
"; - - var premailedOutput = PreMailer.MoveCssInline(input, preserveEntities: true); - - Assert.Contains(htmlEncoded, premailedOutput.Html); - Assert.DoesNotContain("©", premailedOutput.Html); - } - } -}