Skip to content

Commit 975e8df

Browse files
author
Malte Legenhausen
committed
Closed #14
1 parent 7b2d882 commit 975e8df

File tree

2 files changed

+23
-8
lines changed

2 files changed

+23
-8
lines changed

example/test.html

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,15 @@
22
<head>
33
<meta charset="utf-8">
44
</head>
5-
65
<body>
6+
<style>
7+
h2 {
8+
font-size:18px;
9+
}
10+
</style>
11+
<script>
12+
alert('Hallo Welt');
13+
</script>
714
<table cellpadding="0" cellspacing="0" border="0">
815
<tr>
916
<td>

lib/html-to-text.js

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ var htmlparser = require("htmlparser");
88
var helper = require('./helper');
99
var format = require('./formatter');
1010

11+
// Which type of tags should not be parsed
12+
var SKIP_TYPES = [
13+
'style',
14+
'script'
15+
];
16+
1117
function htmlToText(html, options) {
1218
options = options || {};
1319
_.defaults(options, {
@@ -17,9 +23,9 @@ function htmlToText(html, options) {
1723

1824
var handler = new htmlparser.DefaultHandler(function (error, dom) {
1925

20-
}, {
21-
verbose: true,
22-
ignoreWhitespace: true
26+
}, {
27+
verbose: true,
28+
ignoreWhitespace: true
2329
});
2430
new htmlparser.Parser(handler).parseComplete(html);
2531

@@ -40,7 +46,7 @@ function filterBody(dom) {
4046
});
4147
}
4248
walk(dom);
43-
return result || dom;
49+
return result || dom;
4450
}
4551

4652
function containsTable(attr, tables) {
@@ -62,7 +68,7 @@ function containsTable(attr, tables) {
6268
}
6369
var classes = filterByPrefix(tables, '.');
6470
var ids = filterByPrefix(tables, '#');
65-
return attr && (_.include(classes, attr.class) || _.include(ids, attr.id));
71+
return attr && (_.include(classes, attr.class) || _.include(ids, attr.id));
6672
}
6773

6874
function walk(dom, options) {
@@ -101,14 +107,16 @@ function walk(dom, options) {
101107
break;
102108
}
103109
default:
104-
result += walk(elem.children || [], options);
110+
result += walk(elem.children || [], options);
105111
}
106112
break;
107113
case 'text':
108114
if (elem.raw !== '\r\n') result += format.text(elem, options);
109115
break;
110116
default:
111-
result += walk(elem.children || [], options);
117+
if (!_.include(SKIP_TYPES, elem.type)) {
118+
result += walk(elem.children || [], options);
119+
}
112120
}
113121
});
114122
return result;

0 commit comments

Comments
 (0)