@@ -8,6 +8,12 @@ var htmlparser = require("htmlparser");
8
8
var helper = require ( './helper' ) ;
9
9
var format = require ( './formatter' ) ;
10
10
11
+ // Which type of tags should not be parsed
12
+ var SKIP_TYPES = [
13
+ 'style' ,
14
+ 'script'
15
+ ] ;
16
+
11
17
function htmlToText ( html , options ) {
12
18
options = options || { } ;
13
19
_ . defaults ( options , {
@@ -17,9 +23,9 @@ function htmlToText(html, options) {
17
23
18
24
var handler = new htmlparser . DefaultHandler ( function ( error , dom ) {
19
25
20
- } , {
21
- verbose : true ,
22
- ignoreWhitespace : true
26
+ } , {
27
+ verbose : true ,
28
+ ignoreWhitespace : true
23
29
} ) ;
24
30
new htmlparser . Parser ( handler ) . parseComplete ( html ) ;
25
31
@@ -40,7 +46,7 @@ function filterBody(dom) {
40
46
} ) ;
41
47
}
42
48
walk ( dom ) ;
43
- return result || dom ;
49
+ return result || dom ;
44
50
}
45
51
46
52
function containsTable ( attr , tables ) {
@@ -62,7 +68,7 @@ function containsTable(attr, tables) {
62
68
}
63
69
var classes = filterByPrefix ( tables , '.' ) ;
64
70
var ids = filterByPrefix ( tables , '#' ) ;
65
- return attr && ( _ . include ( classes , attr . class ) || _ . include ( ids , attr . id ) ) ;
71
+ return attr && ( _ . include ( classes , attr . class ) || _ . include ( ids , attr . id ) ) ;
66
72
}
67
73
68
74
function walk ( dom , options ) {
@@ -101,14 +107,16 @@ function walk(dom, options) {
101
107
break ;
102
108
}
103
109
default :
104
- result += walk ( elem . children || [ ] , options ) ;
110
+ result += walk ( elem . children || [ ] , options ) ;
105
111
}
106
112
break ;
107
113
case 'text' :
108
114
if ( elem . raw !== '\r\n' ) result += format . text ( elem , options ) ;
109
115
break ;
110
116
default :
111
- result += walk ( elem . children || [ ] , options ) ;
117
+ if ( ! _ . include ( SKIP_TYPES , elem . type ) ) {
118
+ result += walk ( elem . children || [ ] , options ) ;
119
+ }
112
120
}
113
121
} ) ;
114
122
return result ;
0 commit comments