Skip to content

Commit 74351bb

Browse files
authored
Merge pull request #64 from mischov/fix/html-encoding-invalid-characters
Escape problem chars when encoding attribute values and text to html
2 parents dadfabd + 4f72ff4 commit 74351bb

File tree

4 files changed

+40
-8
lines changed

4 files changed

+40
-8
lines changed

lib/meeseeks/document/element.ex

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,7 @@ defmodule Meeseeks.Document.Element do
129129
end
130130

131131
defp join_attribute({attribute, value}, acc) do
132-
if String.contains?(value, "\"") do
133-
"#{acc} #{attribute}='#{value}'"
134-
else
135-
"#{acc} #{attribute}=\"#{value}\""
136-
end
132+
"#{acc} #{attribute}=\"#{Helpers.html_escape_attribute_value(value)}\""
137133
end
138134

139135
defp join_data(node, acc, document) do

lib/meeseeks/document/helpers.ex

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,43 @@
11
defmodule Meeseeks.Document.Helpers do
22
@moduledoc false
33

4+
# collapse_whitespace
5+
46
def collapse_whitespace(string) do
57
String.replace(string, ~r/[\s]+/, " ")
68
end
9+
10+
# html_escape
11+
12+
def html_escape_attribute_value(attribute_value) do
13+
html_escape_chars(attribute_value, ["&", "\""])
14+
end
15+
16+
def html_escape_text(text) do
17+
html_escape_chars(text, ["&", "<", ">"])
18+
end
19+
20+
defp html_escape_chars(subject, escaped_chars) do
21+
matches = :binary.matches(subject, escaped_chars)
22+
23+
subject
24+
|> do_replace(matches, &html_escape_char/1, 0)
25+
|> IO.iodata_to_binary()
26+
end
27+
28+
defp do_replace(subject, [], _, n) do
29+
[binary_part(subject, n, byte_size(subject) - n)]
30+
end
31+
32+
defp do_replace(subject, [{start, length} | matches], replacement, n) do
33+
prefix = binary_part(subject, n, start - n)
34+
middle = replacement.(binary_part(subject, start, length))
35+
[prefix, middle | do_replace(subject, matches, replacement, start + length)]
36+
end
37+
38+
defp html_escape_char("<"), do: "&lt;"
39+
defp html_escape_char(">"), do: "&gt;"
40+
defp html_escape_char("&"), do: "&amp;"
41+
defp html_escape_char("\""), do: "&quot;"
42+
defp html_escape_char("'"), do: "&#39;"
743
end

lib/meeseeks/document/text.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ defmodule Meeseeks.Document.Text do
99

1010
@impl true
1111
def html(node, _document) do
12-
node.content
12+
Helpers.html_escape_text(node.content)
1313
end
1414

1515
@impl true

test/meeseeks/document_test.exs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ defmodule Meeseeks.DocumentTest do
1010
{"head", [], []},
1111
{"body", [],
1212
[
13-
{"div", [{"attr", "1 \"2\" 3"}],
13+
{"div", [{"attr", "1 \"2\" 3 '"}],
1414
[
1515
{"p", [], []},
1616
{"p", [], []},
@@ -24,7 +24,7 @@ defmodule Meeseeks.DocumentTest do
2424

2525
test "html" do
2626
expected =
27-
"<!DOCTYPE html><html><head></head><body><div attr='1 \"2\" 3'><p></p><p></p><div><p></p><p></p></div><p></p></div></body></html>"
27+
"<!DOCTYPE html><html><head></head><body><div attr=\"1 &quot;2&quot; 3 '\"><p></p><p></p><div><p></p><p></p></div><p></p></div></body></html>"
2828

2929
assert Document.html(@document) == expected
3030
end

0 commit comments

Comments
 (0)