@@ -6,6 +6,7 @@ var _s = require('underscore.string');
6
6
var htmlparser = require ( "htmlparser" ) ;
7
7
8
8
var helper = require ( './helper' ) ;
9
+ var format = require ( './formatter' ) ;
9
10
10
11
function htmlToText ( html , options ) {
11
12
options = options || { } ;
@@ -42,151 +43,6 @@ function filterBody(dom) {
42
43
return result || dom ;
43
44
}
44
45
45
- function zip ( array ) {
46
- return _ . zip . apply ( _ , array ) ;
47
- }
48
-
49
- function wordwrap ( text , max ) {
50
- var result = '' ;
51
- var words = _s . words ( text ) ;
52
- var length = 0 ;
53
- var buffer = [ ] ;
54
- _ . each ( words , function ( word ) {
55
- if ( length + word . length <= max ) {
56
- buffer . push ( word ) ;
57
- // Add word length + one whitespace
58
- length += word . length + 1 ;
59
- } else {
60
- result += buffer . join ( ' ' ) + '\n' ;
61
- buffer = [ word ] ;
62
- length = word . length ;
63
- }
64
- } ) ;
65
- result += buffer . join ( ' ' ) ;
66
- return _s . rstrip ( result ) ;
67
- }
68
-
69
- function formatText ( elem , options ) {
70
- var text = _s . strip ( elem . raw ) ;
71
- text = helper . decodeHTMLEntities ( text ) ;
72
- return wordwrap ( text , options . wordwrap ) ;
73
- }
74
-
75
- function formatBreak ( elem , fn , options ) {
76
- return '\n' + fn ( elem . children , options ) ;
77
- }
78
-
79
- function formatParagraph ( elem , fn , options ) {
80
- return fn ( elem . children , options ) + '\n\n' ;
81
- }
82
-
83
- function formatTitle ( elem , fn , options ) {
84
- return fn ( elem . children , options ) . toUpperCase ( ) + '\n' ;
85
- }
86
-
87
- function formatAnchor ( elem , fn , options ) {
88
- return elem . attribs . href . replace ( / ^ m a i l t o \: / , '' ) ;
89
- }
90
-
91
- function formatHorizontalLine ( elem , fn , options ) {
92
- return _s . repeat ( '-' , options . wordwrap ) + '\n\n' ;
93
- }
94
-
95
- function formatListEntry ( prefix , elem , fn , options ) {
96
- options = _ . clone ( options ) ;
97
- // Reduce the wordwrap for sub elements.
98
- options . wordwrap -= prefix . length ;
99
- // Process sub elements.
100
- var text = fn ( elem . children , options ) ;
101
- // Replace all line breaks with line break + prefix spacing.
102
- text = text . replace ( / \n / g, '\n' + _s . repeat ( ' ' , prefix . length ) ) ;
103
- // Add first prefix and line break at the end.
104
- return prefix + text + '\n' ;
105
- }
106
-
107
- function formatList ( elem , fn , options ) {
108
- var result = '' ;
109
- if ( elem . name === 'ul' ) {
110
- _ . each ( elem . children , function ( elem ) {
111
- result += formatListEntry ( ' * ' , elem , fn , options ) ;
112
- } ) ;
113
- } else if ( elem . name === 'ol' ) {
114
- // Calculate the maximum length to i.
115
- var maxLength = elem . children . length . toString ( ) . length ;
116
- _ . each ( elem . children , function ( elem , i ) {
117
- var index = i + 1 ;
118
- // Calculate the needed spacing for nice indentation.
119
- var spacing = maxLength - index . toString ( ) . length ;
120
- var prefix = ' ' + index + '. ' + _s . repeat ( ' ' , spacing ) ;
121
- result += formatListEntry ( prefix , elem , fn , options ) ;
122
- } ) ;
123
- }
124
- return result + '\n' ;
125
- }
126
-
127
- function tableToString ( table ) {
128
- // Determine space width per column
129
- // Convert all rows to lengths
130
- var widths = _ . map ( table , function ( row ) {
131
- return _ . map ( row , function ( col ) {
132
- return col . length ;
133
- } ) ;
134
- } ) ;
135
- // Invert rows with colums
136
- widths = zip ( widths ) ;
137
- // Determine the max values for each column
138
- widths = _ . map ( widths , function ( col ) {
139
- return _ . max ( col ) ;
140
- } ) ;
141
-
142
- // Build the table
143
- var text = '' ;
144
- _ . each ( table , function ( row ) {
145
- var i = 0 ;
146
- _ . each ( row , function ( col ) {
147
- text += _s . rpad ( _s . strip ( col ) , widths [ i ++ ] , ' ' ) + ' ' ;
148
- } ) ;
149
- text += '\n' ;
150
- } ) ;
151
- return text + '\n' ;
152
- }
153
-
154
- function formatTable ( elem , fn , options ) {
155
- var table = [ ] ;
156
- _ . each ( elem . children , function ( elem ) {
157
- if ( elem . type === 'tag' && elem . name === 'tr' ) {
158
- var rows = [ ] ;
159
- _ . each ( elem . children , function ( elem ) {
160
- var tokens , times ;
161
- if ( elem . type === 'tag' ) {
162
- if ( elem . name === 'th' ) {
163
- tokens = formatTitle ( elem , fn , options ) . split ( '\n' ) ;
164
- rows . push ( _ . compact ( tokens ) ) ;
165
- } else if ( elem . name === 'td' ) {
166
- tokens = fn ( elem . children , options ) . split ( '\n' ) ;
167
- rows . push ( _ . compact ( tokens ) ) ;
168
- // Fill colspans with empty values
169
- if ( elem . attribs && elem . attribs . colspan ) {
170
- times = elem . attribs . colspan - 1 ;
171
- _ . times ( times , function ( ) {
172
- rows . push ( [ '' ] ) ;
173
- } ) ;
174
- }
175
- }
176
- }
177
- } ) ;
178
- rows = zip ( rows ) ;
179
- _ . each ( rows , function ( row ) {
180
- row = _ . map ( row , function ( col ) {
181
- return col || '' ;
182
- } ) ;
183
- table . push ( row ) ;
184
- } ) ;
185
- }
186
- } ) ;
187
- return tableToString ( table ) ;
188
- }
189
-
190
46
function containsTable ( attr , tables ) {
191
47
if ( tables === true ) return true ;
192
48
@@ -216,38 +72,40 @@ function walk(dom, options) {
216
72
case 'tag' :
217
73
switch ( elem . name ) {
218
74
case 'a' :
219
- result += formatAnchor ( elem , walk , options ) ;
75
+ result += format . anchor ( elem , walk , options ) ;
220
76
break ;
221
77
case 'p' :
222
- result += formatParagraph ( elem , walk , options ) ;
78
+ result += format . paragraph ( elem , walk , options ) ;
223
79
break ;
224
80
case 'h1' :
225
81
case 'h2' :
226
82
case 'h3' :
227
83
case 'h4' :
228
- result += formatTitle ( elem , walk , options ) ;
84
+ result += format . heading ( elem , walk , options ) ;
229
85
break ;
230
86
case 'br' :
231
- result += formatBreak ( elem , walk , options ) ;
87
+ result += format . lineBreak ( elem , walk , options ) ;
232
88
break ;
233
89
case 'hr' :
234
- result += formatHorizontalLine ( elem , walk , options ) ;
90
+ result += format . horizontalLine ( elem , walk , options ) ;
235
91
break ;
236
92
case 'ul' :
93
+ result += format . unorderedList ( elem , walk , options ) ;
94
+ break ;
237
95
case 'ol' :
238
- result += formatList ( elem , walk , options ) ;
96
+ result += format . orderedList ( elem , walk , options ) ;
239
97
break ;
240
98
case 'table' :
241
99
if ( containsTable ( elem . attribs , options . tables ) ) {
242
- result += formatTable ( elem , walk , options ) ;
100
+ result += format . table ( elem , walk , options ) ;
243
101
break ;
244
102
}
245
103
default :
246
104
result += walk ( elem . children || [ ] , options ) ;
247
105
}
248
106
break ;
249
107
case 'text' :
250
- if ( elem . raw !== '\r\n' ) result += formatText ( elem , options ) ;
108
+ if ( elem . raw !== '\r\n' ) result += format . text ( elem , options ) ;
251
109
break ;
252
110
default :
253
111
result += walk ( elem . children || [ ] , options ) ;
0 commit comments