Skip to content

Commit 1162877

Browse files
author
Malte Legenhausen
committed
Closed #1
1 parent 78cd13f commit 1162877

File tree

4 files changed

+35
-13
lines changed

4 files changed

+35
-13
lines changed

README.md

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ console.log(text);
4545

4646
You can configure the behaviour of html-to-text with the following options:
4747

48-
* `tables` allows to select certain tables by the `class` attribute from the HTML document. This is necessary because the majority of HTML E-Mails uses a table based layout. So you have to define which tables should be treaded as `table`. All other tables are ignored. Default: `[]`
48+
* `tables` allows to select certain tables by the `class` or `id` attribute from the HTML document. This is necessary because the majority of HTML E-Mails uses a table based layout. Prefix your table selectors with an `.` for the `class` and with a `#` for the `id` attribute. All other tables are ignored. You can assign `true` to this attribute to select all tables. Default: `[]`
4949
* `wordwrap` defines after how many chars a line break should follow in `p` elements. Default: `80`
5050

5151
## Command Line Interface
@@ -61,7 +61,7 @@ cat examples/test.html | html-to-text > test.txt
6161
There also all options available as described above. You can use them like this:
6262

6363
```
64-
cat examples/test.html | html-to-text --tables=invoice,address --wordwrap=100 > test.txt
64+
cat examples/test.html | html-to-text --tables=#invoice,.address --wordwrap=100 > test.txt
6565
```
6666

6767
The `tables` option has to be declared as comma separated list without whitespaces.
@@ -93,7 +93,7 @@ The `tables` option has to be declared as comma separated list without whitespac
9393
</tr>
9494
<tr>
9595
<td>
96-
<table>
96+
<table id="invoice">
9797
<tr>
9898
<th>Article</th>
9999
<th>Price</th>
@@ -108,7 +108,7 @@ The `tables` option has to be declared as comma separated list without whitespac
108108
<span style="font-size:0.8em">Contains: 1x Product 1</span>
109109
</p>
110110
</td>
111-
<td align="right" valign="top">6,99</td>
111+
<td align="right" valign="top">6,99&euro;</td>
112112
<td align="right" valign="top">7%</td>
113113
<td align="right" valign="top">1</td>
114114
<td align="right" valign="top">6,99€</td>
@@ -138,7 +138,7 @@ The `tables` option has to be declared as comma separated list without whitespac
138138
<tr>
139139
<td>
140140
<hr />
141-
<table>
141+
<table class="address">
142142
<tr>
143143
<th align="left">Invoice Address</th>
144144
<th align="left">Shipment Address</th>
@@ -206,24 +206,30 @@ sea takimata sanctus est Lorem ipsum dolor sit amet.
206206
207207
ARTICLE PRICE TAXES AMOUNT TOTAL
208208
Product 1 6,99€ 7% 1 6,99€
209-
Contains: 1x Product 1
209+
Contains: 1x Product 1
210210
Shipment costs 3,25€ 7% 1 3,25€
211-
to pay: 10,24€
212-
Taxes 7%: 0,72€
211+
to pay: 10,24€
212+
Taxes 7%: 0,72€
213+
214+
--------------------------------------------------------------------------------
213215
214216
INVOICE ADDRESS SHIPMENT ADDRESS
215217
Mr. Mr.
216218
John Doe John Doe
217219
Featherstone Street 49 Featherstone Street 49
218220
28199 Bremen 28199 Bremen
219221
222+
--------------------------------------------------------------------------------
223+
220224
LAW OF REVOCATION
221225
At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd no sea
222226
takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet,
223227
diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed
224228
voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita
225229
sea takimata sanctus est Lorem ipsum dolor sit amet.
226230
231+
--------------------------------------------------------------------------------
232+
227233
TERMS OF CONDITION
228234
At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd no sea
229235
takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet,

example/html-to-text.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ console.log();
1111

1212
console.log('fromFile:');
1313
htmlToText.fromFile(path.join(__dirname, 'test.html'), {
14-
tables: ['invoice', 'address']
14+
tables: ['#invoice', '.address']
1515
}, function(err, text) {
1616
if (err) return console.error(err);
1717
console.log(text);

example/test.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ <h1>Dear John Doe,</h1>
2222
</tr>
2323
<tr>
2424
<td>
25-
<table class="invoice">
25+
<table id="invoice">
2626
<tr>
2727
<th>Article</th>
2828
<th>Price</th>

lib/html-to-text.js

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ function formatText(elem, options) {
7373
var text = elem.raw;
7474
text = _s.strip(text);
7575
text = helper.decodeHTMLEntities(text);
76-
text = wordwrap(text, options.wordwrap || 80);
76+
text = wordwrap(text, options.wordwrap);
7777
return text;
7878
}
7979

@@ -94,7 +94,7 @@ function formatAnchor(elem, fn) {
9494
}
9595

9696
function formatHorizontalLine(elem, fn, options) {
97-
return '\n' + _s.repeat('-', options.wordwrap || 80) + '\n';
97+
return '\n' + _s.repeat('-', options.wordwrap) + '\n';
9898
}
9999

100100
function tableToString(table) {
@@ -160,6 +160,22 @@ function formatTable(elem, fn) {
160160
return tableToString(table);
161161
}
162162

163+
function containsTable(attr, tables) {
164+
if (tables === true) return true;
165+
166+
function removePrefix(key) {
167+
return key.substr(1);
168+
}
169+
function checkPrefix(prefix) {
170+
return function(key) {
171+
return _s.startsWith(key, prefix);
172+
};
173+
}
174+
var classes = _(tables).chain().filter(checkPrefix('.')).map(removePrefix).value();
175+
var ids = _(tables).chain().filter(checkPrefix('#')).map(removePrefix).value();
176+
return attr && (_.include(classes, attr.class) || _.include(ids, attr.id));
177+
}
178+
163179
function buildText(dom, options) {
164180
function walk(dom) {
165181
var result = '';
@@ -186,7 +202,7 @@ function buildText(dom, options) {
186202
result += formatHorizontalLine(elem, walk, options);
187203
break;
188204
case 'table':
189-
if (elem.attribs && elem.attribs.class && _.include(options.tables, elem.attribs.class)) {
205+
if (containsTable(elem.attribs, options.tables)) {
190206
result += formatTable(elem, walk);
191207
break;
192208
}

0 commit comments

Comments
 (0)