diff --git a/PreMailer.Net/PreMailer.Net.Tests/Html/PreserveEntitiesHtmlMarkupFormatterTests.cs b/PreMailer.Net/PreMailer.Net.Tests/Html/PreserveEntitiesHtmlMarkupFormatterTests.cs new file mode 100644 index 0000000..068b06a --- /dev/null +++ b/PreMailer.Net/PreMailer.Net.Tests/Html/PreserveEntitiesHtmlMarkupFormatterTests.cs @@ -0,0 +1,30 @@ +using AngleSharp.Dom; +using AngleSharp.Html.Parser; +using PreMailer.Net.Html; +using System; +using System.IO; +using Xunit; + +namespace PreMailer.Net.Tests.Html +{ + public class PreserveEntitiesHtmlMarkupFormatterTests + { + [Fact] + public void Text_GivenHtmlWithCopyEntity_PreservesEntity() + { + string html = "

© 2025

"; + var document = new HtmlParser().ParseDocument(html); + var formatter = PreserveEntitiesHtmlMarkupFormatter.Instance; + + string result; + using (var sw = new StringWriter()) + { + document.ToHtml(sw, formatter); + result = sw.ToString(); + } + + Assert.Contains("©", result); + Assert.DoesNotContain("©", result); + } + } +} diff --git a/PreMailer.Net/PreMailer.Net.Tests/PreMailerTests.cs b/PreMailer.Net/PreMailer.Net.Tests/PreMailerTests.cs index 9923ece..5635888 100644 --- a/PreMailer.Net/PreMailer.Net.Tests/PreMailerTests.cs +++ b/PreMailer.Net/PreMailer.Net.Tests/PreMailerTests.cs @@ -686,5 +686,17 @@ public void MoveCssInline_GivenCssWithQuotes_ReplacesWithSingleQuotation() Assert.Contains("font-family: 'Roboto', sans-serif", premailedOutput.Html); } + + [Fact] + public void MoveCssInline_GivenCopyEntity_PreservesEntity() + { + string htmlEncoded = "©"; + string input = $"
{htmlEncoded}
"; + + var premailedOutput = PreMailer.MoveCssInline(input, preserveEntities: true); + + Assert.Contains(htmlEncoded, premailedOutput.Html); + Assert.DoesNotContain("©", premailedOutput.Html); + } } } diff --git a/PreMailer.Net/PreMailer.Net/Html/PreserveEntitiesHtmlMarkupFormatter.cs b/PreMailer.Net/PreMailer.Net/Html/PreserveEntitiesHtmlMarkupFormatter.cs new file mode 100644 index 0000000..09aa5eb --- /dev/null +++ b/PreMailer.Net/PreMailer.Net/Html/PreserveEntitiesHtmlMarkupFormatter.cs @@ -0,0 +1,40 @@ +using AngleSharp.Html; +using AngleSharp.Dom; +using System; +using System.Text.RegularExpressions; +using System.Collections.Generic; + +namespace PreMailer.Net.Html +{ + public class PreserveEntitiesHtmlMarkupFormatter : HtmlMarkupFormatter + { + private static readonly Dictionary EntityReplacements = new Dictionary + { + { "©", "©" }, + { "®", "®" }, + { "™", "™" }, + { "£", "£" }, + { "€", "€" }, + { "¥", "¥" }, + { "§", "§" }, + { "±", "±" }, + { "¼", "¼" }, + { "½", "½" }, + { "¾", "¾" } + }; + + public static new readonly PreserveEntitiesHtmlMarkupFormatter Instance = new PreserveEntitiesHtmlMarkupFormatter(); + + public override string Text(ICharacterData text) + { + var result = base.Text(text); + + foreach (var entity in EntityReplacements) + { + result = result.Replace(entity.Key, entity.Value); + } + + return result; + } + } +} diff --git a/PreMailer.Net/PreMailer.Net/PreMailer.cs b/PreMailer.Net/PreMailer.Net/PreMailer.cs index c259aa8..46a5e2c 100644 --- a/PreMailer.Net/PreMailer.Net/PreMailer.cs +++ b/PreMailer.Net/PreMailer.Net/PreMailer.cs @@ -6,6 +6,7 @@ using AngleSharp.Html.Parser; using AngleSharp.Xhtml; using PreMailer.Net.Extensions; +using PreMailer.Net.Html; using PreMailer.Net.Sources; using System; using System.Collections.Generic; @@ -67,85 +68,95 @@ public PreMailer(Stream stream, Uri baseUri = null) _cssSelectorParser = new CssSelectorParser(); } - /// - /// In-lines the CSS within the HTML given. - /// - /// The HTML input. - /// If set to true the style elements are removed. - /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) - /// A string containing a style-sheet for inlining. - /// True to strip ID and class attributes - /// True to remove comments, false to leave them intact - /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node - /// Returns the html input, with styles moved to inline attributes. - public static InlineResult MoveCssInline(string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false) - { - return new PreMailer(html).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries); - } + /// + /// In-lines the CSS within the HTML given. + /// + /// The HTML input. + /// If set to true the style elements are removed. + /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) + /// A string containing a style-sheet for inlining. + /// True to strip ID and class attributes + /// True to remove comments, false to leave them intact + /// Custom formatter to use + /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node + /// If set to true, HTML entities like © will be preserved instead of being converted to characters + /// Returns the html input, with styles moved to inline attributes. + public static InlineResult MoveCssInline(string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false) + { + return new PreMailer(html).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities); + } - /// - /// In-lines the CSS within the HTML given. - /// - /// The Stream input. - /// If set to true the style elements are removed. - /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) - /// A string containing a style-sheet for inlining. - /// True to strip ID and class attributes - /// True to remove comments, false to leave them intact - /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node - /// Returns the html input, with styles moved to inline attributes. - public static InlineResult MoveCssInline(Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false) - { - return new PreMailer(stream).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries); - } + /// + /// In-lines the CSS within the HTML given. + /// + /// The Stream input. + /// If set to true the style elements are removed. + /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) + /// A string containing a style-sheet for inlining. + /// True to strip ID and class attributes + /// True to remove comments, false to leave them intact + /// Custom formatter to use + /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node + /// If set to true, HTML entities like © will be preserved instead of being converted to characters + /// Returns the html input, with styles moved to inline attributes. + public static InlineResult MoveCssInline(Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false) + { + return new PreMailer(stream).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities); + } - /// - /// In-lines the CSS within the HTML given. - /// - /// /// The base url that will be used to resolve any relative urls - /// The Url that all relative urls will be off of. - /// The HTML input. - /// If set to true the style elements are removed. - /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) - /// A string containing a style-sheet for inlining. - /// True to strip ID and class attributes - /// True to remove comments, false to leave them intact - /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node - /// Returns the html input, with styles moved to inline attributes. - public static InlineResult MoveCssInline(Uri baseUri, string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false) - { - return new PreMailer(html, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries); - } + /// + /// In-lines the CSS within the HTML given. + /// + /// /// The base url that will be used to resolve any relative urls + /// The Url that all relative urls will be off of. + /// The HTML input. + /// If set to true the style elements are removed. + /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) + /// A string containing a style-sheet for inlining. + /// True to strip ID and class attributes + /// True to remove comments, false to leave them intact + /// Custom formatter to use + /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node + /// If set to true, HTML entities like © will be preserved instead of being converted to characters + /// Returns the html input, with styles moved to inline attributes. + public static InlineResult MoveCssInline(Uri baseUri, string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false) + { + return new PreMailer(html, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities); + } - /// - /// In-lines the CSS within the HTML given. - /// - /// /// The base url that will be used to resolve any relative urls - /// The Url that all relative urls will be off of. - /// The HTML input. - /// If set to true the style elements are removed. - /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) - /// A string containing a style-sheet for inlining. - /// True to strip ID and class attributes - /// True to remove comments, false to leave them intact - /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node - /// Returns the html input, with styles moved to inline attributes. - public static InlineResult MoveCssInline(Uri baseUri, Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false) - { - return new PreMailer(stream, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries); - } + /// + /// In-lines the CSS within the HTML given. + /// + /// /// The base url that will be used to resolve any relative urls + /// The Url that all relative urls will be off of. + /// The HTML input. + /// If set to true the style elements are removed. + /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) + /// A string containing a style-sheet for inlining. + /// True to strip ID and class attributes + /// True to remove comments, false to leave them intact + /// Custom formatter to use + /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node + /// If set to true, HTML entities like © will be preserved instead of being converted to characters + /// Returns the html input, with styles moved to inline attributes. + public static InlineResult MoveCssInline(Uri baseUri, Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false) + { + return new PreMailer(stream, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities); + } - /// - /// In-lines the CSS for the current HTML - /// - /// If set to true the style elements are removed. - /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) - /// A string containing a style-sheet for inlining. - /// True to strip ID and class attributes - /// True to remove comments, false to leave them intact - /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node - /// Returns the html input, with styles moved to inline attributes. - public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false) + /// + /// In-lines the CSS for the current HTML + /// + /// If set to true the style elements are removed. + /// CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.) + /// A string containing a style-sheet for inlining. + /// True to strip ID and class attributes + /// True to remove comments, false to leave them intact + /// Custom formatter to use + /// If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node + /// If set to true, HTML entities like © will be preserved instead of being converted to characters + /// Returns the html input, with styles moved to inline attributes. + public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false) { // Store the variables used for inlining the CSS _removeStyleElements = removeStyleElements; @@ -184,7 +195,14 @@ public InlineResult MoveCssInline(bool removeStyleElements = false, string ignor RemoveCssComments(cssSourceNodes); } - IMarkupFormatter markupFormatter = customFormatter ?? GetMarkupFormatterForDocType(); + IMarkupFormatter markupFormatter = customFormatter; + + if (markupFormatter == null) + { + markupFormatter = preserveEntities ? + PreserveEntitiesHtmlMarkupFormatter.Instance : + GetMarkupFormatterForDocType(); + } using (var sw = new StringWriter()) {