Skip to content

Fix #347: Preserve HTML entities like © during processing #425

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using AngleSharp.Dom;
using AngleSharp.Html.Parser;
using PreMailer.Net.Html;
using System;
using System.IO;
using Xunit;

namespace PreMailer.Net.Tests.Html
{
public class PreserveEntitiesHtmlMarkupFormatterTests
{
[Fact]
public void Text_GivenHtmlWithCopyEntity_PreservesEntity()
{
string html = "<html><body><p>&copy; 2025</p></body></html>";
var document = new HtmlParser().ParseDocument(html);
var formatter = PreserveEntitiesHtmlMarkupFormatter.Instance;

string result;
using (var sw = new StringWriter())
{
document.ToHtml(sw, formatter);
result = sw.ToString();
}

Assert.Contains("&copy;", result);
Assert.DoesNotContain("©", result);
}
}
}
12 changes: 12 additions & 0 deletions PreMailer.Net/PreMailer.Net.Tests/PreMailerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -686,5 +686,17 @@ public void MoveCssInline_GivenCssWithQuotes_ReplacesWithSingleQuotation()

Assert.Contains("font-family: 'Roboto', sans-serif", premailedOutput.Html);
}

[Fact]
public void MoveCssInline_GivenCopyEntity_PreservesEntity()
{
string htmlEncoded = "&copy;";
string input = $"<html><head></head><body><div>{htmlEncoded}</div></body></html>";

var premailedOutput = PreMailer.MoveCssInline(input, preserveEntities: true);

Assert.Contains(htmlEncoded, premailedOutput.Html);
Assert.DoesNotContain("©", premailedOutput.Html);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
using AngleSharp.Html;
using AngleSharp.Dom;
using System;
using System.Text.RegularExpressions;
using System.Collections.Generic;

namespace PreMailer.Net.Html
{
public class PreserveEntitiesHtmlMarkupFormatter : HtmlMarkupFormatter
{
private static readonly Dictionary<string, string> EntityReplacements = new Dictionary<string, string>
{
{ "©", "&copy;" },
{ "®", "&reg;" },
{ "™", "&trade;" },
{ "£", "&pound;" },
{ "€", "&euro;" },
{ "¥", "&yen;" },
{ "§", "&sect;" },
{ "±", "&plusmn;" },
{ "¼", "&frac14;" },
{ "½", "&frac12;" },
{ "¾", "&frac34;" }
};

public static new readonly PreserveEntitiesHtmlMarkupFormatter Instance = new PreserveEntitiesHtmlMarkupFormatter();

public override string Text(ICharacterData text)
{
var result = base.Text(text);

foreach (var entity in EntityReplacements)
{
result = result.Replace(entity.Key, entity.Value);
}

return result;
}
}
}
170 changes: 94 additions & 76 deletions PreMailer.Net/PreMailer.Net/PreMailer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using AngleSharp.Html.Parser;
using AngleSharp.Xhtml;
using PreMailer.Net.Extensions;
using PreMailer.Net.Html;
using PreMailer.Net.Sources;
using System;
using System.Collections.Generic;
Expand Down Expand Up @@ -67,85 +68,95 @@ public PreMailer(Stream stream, Uri baseUri = null)
_cssSelectorParser = new CssSelectorParser();
}

/// <summary>
/// In-lines the CSS within the HTML given.
/// </summary>
/// <param name="html">The HTML input.</param>
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
/// <param name="css">A string containing a style-sheet for inlining.</param>
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
public static InlineResult MoveCssInline(string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false)
{
return new PreMailer(html).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries);
}
/// <summary>
/// In-lines the CSS within the HTML given.
/// </summary>
/// <param name="html">The HTML input.</param>
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
/// <param name="css">A string containing a style-sheet for inlining.</param>
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
/// <param name="customFormatter">Custom formatter to use</param>
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
/// <param name="preserveEntities">If set to true, HTML entities like &copy; will be preserved instead of being converted to characters</param>
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
public static InlineResult MoveCssInline(string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false)
{
return new PreMailer(html).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities);
}

/// <summary>
/// In-lines the CSS within the HTML given.
/// </summary>
/// <param name="stream">The Stream input.</param>
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
/// <param name="css">A string containing a style-sheet for inlining.</param>
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
public static InlineResult MoveCssInline(Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false)
{
return new PreMailer(stream).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries);
}
/// <summary>
/// In-lines the CSS within the HTML given.
/// </summary>
/// <param name="stream">The Stream input.</param>
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
/// <param name="css">A string containing a style-sheet for inlining.</param>
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
/// <param name="customFormatter">Custom formatter to use</param>
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
/// <param name="preserveEntities">If set to true, HTML entities like &copy; will be preserved instead of being converted to characters</param>
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
public static InlineResult MoveCssInline(Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false)
{
return new PreMailer(stream).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities);
}

/// <summary>
/// In-lines the CSS within the HTML given.
/// </summary>
/// /// <param name="baseUri">The base url that will be used to resolve any relative urls</param>
/// <param name="baseUri">The Url that all relative urls will be off of.</param>
/// <param name="html">The HTML input.</param>
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
/// <param name="css">A string containing a style-sheet for inlining.</param>
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
public static InlineResult MoveCssInline(Uri baseUri, string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false)
{
return new PreMailer(html, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries);
}
/// <summary>
/// In-lines the CSS within the HTML given.
/// </summary>
/// /// <param name="baseUri">The base url that will be used to resolve any relative urls</param>
/// <param name="baseUri">The Url that all relative urls will be off of.</param>
/// <param name="html">The HTML input.</param>
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
/// <param name="css">A string containing a style-sheet for inlining.</param>
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
/// <param name="customFormatter">Custom formatter to use</param>
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
/// <param name="preserveEntities">If set to true, HTML entities like &copy; will be preserved instead of being converted to characters</param>
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
public static InlineResult MoveCssInline(Uri baseUri, string html, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false)
{
return new PreMailer(html, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities);
}

/// <summary>
/// In-lines the CSS within the HTML given.
/// </summary>
/// /// <param name="baseUri">The base url that will be used to resolve any relative urls</param>
/// <param name="baseUri">The Url that all relative urls will be off of.</param>
/// <param name="stream">The HTML input.</param>
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
/// <param name="css">A string containing a style-sheet for inlining.</param>
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
public static InlineResult MoveCssInline(Uri baseUri, Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false)
{
return new PreMailer(stream, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries);
}
/// <summary>
/// In-lines the CSS within the HTML given.
/// </summary>
/// /// <param name="baseUri">The base url that will be used to resolve any relative urls</param>
/// <param name="baseUri">The Url that all relative urls will be off of.</param>
/// <param name="stream">The HTML input.</param>
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
/// <param name="css">A string containing a style-sheet for inlining.</param>
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
/// <param name="customFormatter">Custom formatter to use</param>
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
/// <param name="preserveEntities">If set to true, HTML entities like &copy; will be preserved instead of being converted to characters</param>
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
public static InlineResult MoveCssInline(Uri baseUri, Stream stream, bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false)
{
return new PreMailer(stream, baseUri).MoveCssInline(removeStyleElements, ignoreElements, css, stripIdAndClassAttributes, removeComments, customFormatter, preserveMediaQueries, preserveEntities);
}

/// <summary>
/// In-lines the CSS for the current HTML
/// </summary>
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
/// <param name="css">A string containing a style-sheet for inlining.</param>
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false)
/// <summary>
/// In-lines the CSS for the current HTML
/// </summary>
/// <param name="removeStyleElements">If set to <c>true</c> the style elements are removed.</param>
/// <param name="ignoreElements">CSS selector for STYLE elements to ignore (e.g. mobile-specific styles etc.)</param>
/// <param name="css">A string containing a style-sheet for inlining.</param>
/// <param name="stripIdAndClassAttributes">True to strip ID and class attributes</param>
/// <param name="removeComments">True to remove comments, false to leave them intact</param>
/// <param name="customFormatter">Custom formatter to use</param>
/// <param name="preserveMediaQueries">If set to true and removeStyleElements is true, it will instead preserve unsupported media queries in the style node and remove the other css, instead of removing the whole style node</param>
/// <param name="preserveEntities">If set to true, HTML entities like &copy; will be preserved instead of being converted to characters</param>
/// <returns>Returns the html input, with styles moved to inline attributes.</returns>
public InlineResult MoveCssInline(bool removeStyleElements = false, string ignoreElements = null, string css = null, bool stripIdAndClassAttributes = false, bool removeComments = false, IMarkupFormatter customFormatter = null, bool preserveMediaQueries = false, bool preserveEntities = false)
{
// Store the variables used for inlining the CSS
_removeStyleElements = removeStyleElements;
Expand Down Expand Up @@ -184,7 +195,14 @@ public InlineResult MoveCssInline(bool removeStyleElements = false, string ignor
RemoveCssComments(cssSourceNodes);
}

IMarkupFormatter markupFormatter = customFormatter ?? GetMarkupFormatterForDocType();
IMarkupFormatter markupFormatter = customFormatter;

if (markupFormatter == null)
{
markupFormatter = preserveEntities ?
PreserveEntitiesHtmlMarkupFormatter.Instance :
GetMarkupFormatterForDocType();
}

using (var sw = new StringWriter())
{
Expand Down