From c7a2e1e986ee2dcb643723a88325e978d6cbe6af Mon Sep 17 00:00:00 2001 From: jellenberger Date: Fri, 11 May 2018 14:41:03 -0500 Subject: [PATCH 1/3] Allow recursive custom formatters * Resolves https://github.com/werk85/node-html-to-text/issues/147 * When format functions are called internally within formatter.js it is not possible to override the custom logic. This changes that functionality and allows users to choose whether they want the default formatter to be used internally or their own formatters supplied in the options object. --- lib/formatter.js | 8 ++++---- test/html-to-text.js | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/lib/formatter.js b/lib/formatter.js index ef2e82d..94a18b7 100644 --- a/lib/formatter.js +++ b/lib/formatter.js @@ -98,7 +98,7 @@ function formatAnchor(elem, fn, options) { options.lineCharCount = storedCharCount; - return formatText({ data: result || href, trimLeadingSpace: elem.trimLeadingSpace }, options); + return Object.assign({}, exports, options.format).text({ data: result || href, trimLeadingSpace: elem.trimLeadingSpace }, options); } function formatHorizontalLine(elem, fn, options) { @@ -128,7 +128,7 @@ function formatUnorderedList(elem, fn, options) { return child.type !== 'text' || !whiteSpaceRegex.test(child.data); }); nonWhiteSpaceChildren.forEach(function(elem) { - result += formatListItem(prefix, elem, fn, options); + result += Object.assign({}, exports, options.format).listItem(prefix, elem, fn, options); }); return result + '\n'; } @@ -163,7 +163,7 @@ function formatOrderedList(elem, fn, options) { // Calculate the needed spacing for nice indentation. var spacing = maxLength - index.toString().length; var prefix = ' ' + index + '. ' + ' '.repeat(spacing); - result += formatListItem(prefix, elem, fn, options); + result += Object.assign({}, exports, options.format).listItem(prefix, elem, fn, options); }); } return result + '\n'; @@ -220,7 +220,7 @@ function formatTable(elem, fn, options) { if (elem.type === 'tag') { switch (elem.name.toLowerCase()) { case 'th': - tokens = formatHeading(elem, fn, options).split('\n'); + tokens = Object.assign({}, exports, options.format).heading(elem, fn, options).split('\n'); rows.push(compact(tokens)); break; diff --git a/test/html-to-text.js b/test/html-to-text.js index af7f7aa..62c631e 100644 --- a/test/html-to-text.js +++ b/test/html-to-text.js @@ -377,6 +377,23 @@ describe('html-to-text', function() { }); expect(result).to.equal('====\ntest\n===='); }); + + it('should use custom formatting functions when nested elements are being parsed', function () { + var result = htmlToText.fromString('', { + format: { + listItem: function (prefix, elem, fn, options) { + options = Object.assign({}, options); + if (options.wordwrap) { + options.wordwrap -= prefix.length; + } + var text = fn(elem.children, options); + text = text.replace(/\n/g, '\n' + ' '.repeat(prefix.length)); + return prefix + text.toUpperCase() + '\n'; + } + } + }); + expect(result).to.equal(' * ONE\n * TWO'); + }); }); describe('Base element', function () { From 58df506267484d3a058b071a66a5a3c476b9f2a8 Mon Sep 17 00:00:00 2001 From: jellenberger Date: Fri, 11 May 2018 15:09:34 -0500 Subject: [PATCH 2/3] fixup - reduce line length --- lib/formatter.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/formatter.js b/lib/formatter.js index 94a18b7..f60efdc 100644 --- a/lib/formatter.js +++ b/lib/formatter.js @@ -98,7 +98,10 @@ function formatAnchor(elem, fn, options) { options.lineCharCount = storedCharCount; - return Object.assign({}, exports, options.format).text({ data: result || href, trimLeadingSpace: elem.trimLeadingSpace }, options); + return Object.assign({}, exports, options.format).text({ + data: result || href, + trimLeadingSpace: elem.trimLeadingSpace + }, options); } function formatHorizontalLine(elem, fn, options) { From 734a5fc11d63c11c17f802d115f887bed02a1d5a Mon Sep 17 00:00:00 2001 From: Jack Ellenberger Date: Fri, 3 Aug 2018 09:56:10 -0500 Subject: [PATCH 3/3] Add ability to register colspans in table headers (#1) * fixup of #144 * update integration test to cover new colspan functionality * update unit tests to cover new colspan functionality --- lib/formatter.js | 19 ++++++++++--------- test/html-to-text.js | 25 +++++++++++++++++++++++++ test/test.html | 7 +++++++ test/test.txt | 12 ++++++------ 4 files changed, 48 insertions(+), 15 deletions(-) diff --git a/lib/formatter.js b/lib/formatter.js index ef2e82d..634ea46 100644 --- a/lib/formatter.js +++ b/lib/formatter.js @@ -221,21 +221,22 @@ function formatTable(elem, fn, options) { switch (elem.name.toLowerCase()) { case 'th': tokens = formatHeading(elem, fn, options).split('\n'); - rows.push(compact(tokens)); break; case 'td': tokens = fn(elem.children, options).split('\n'); - rows.push(compact(tokens)); - // Fill colspans with empty values - if (elem.attribs && elem.attribs.colspan) { - count = elem.attribs.colspan - 1 || 0; - times(count, function() { - rows.push(['']); - }); - } break; } + if (tokens) { + rows.push(compact(tokens)); + // Fill colspans with empty values + if (elem.attribs && elem.attribs.colspan) { + count = elem.attribs.colspan - 1 || 0; + times(count, function() { + rows.push(['']); + }); + } + } } }); rows = helper.arrayZip(rows); diff --git a/test/html-to-text.js b/test/html-to-text.js index af7f7aa..9138959 100644 --- a/test/html-to-text.js +++ b/test/html-to-text.js @@ -192,6 +192,31 @@ describe('html-to-text', function() { var result = htmlToText.fromString(html, { tables: true }); expect(result).to.equal(resultExpected); }); + it('does handle colspan on th elements correctly', function () { + var html = '\ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ +
header column 1header columns 2 and 3header column 4
column 1column 2column 3column 4
\ + '; + var resultExpected = ' HEADER COLUMN 1 HEADER COLUMNS 2 AND 3 HEADER COLUMN 4 \n\ +column 1 column 2 column 3 column 4'; + var result = htmlToText.fromString(html, { tables: true }); + expect(result).to.equal(resultExpected); + }); + }); describe('a', function () { diff --git a/test/test.html b/test/test.html index 984735b..7334547 100644 --- a/test/test.html +++ b/test/test.html @@ -30,6 +30,7 @@

Pretty printed table

Article + Notes Price Taxes Amount @@ -44,6 +45,8 @@

Pretty printed table

Contains: 1x Product 1

+ one + two 6,99€ 7% 1 @@ -51,17 +54,21 @@

Pretty printed table

Shipment costs + + two 3,25€ 7% 1 3,25€ +       to pay: 10,24€ + Taxes 7%: 0,72€ diff --git a/test/test.txt b/test/test.txt index 11544d2..1b24425 100644 --- a/test/test.txt +++ b/test/test.txt @@ -17,12 +17,12 @@ takimata sanctus est Lorem ipsum dolor sit amet. -------------------------------------------------------------------------------- PRETTY PRINTED TABLE -ARTICLE PRICE TAXES AMOUNT TOTAL -Product 1 6,99€ 7% 1 6,99€ -Contains: 1x Product 1 -Shipment costs 3,25€ 7% 1 3,25€ - to pay: 10,24€ - Taxes 7%: 0,72€ +ARTICLE NOTES PRICE TAXES AMOUNT TOTAL +Product 1 one two 6,99€ 7% 1 6,99€ +Contains: 1x Product 1 +Shipment costs two 3,25€ 7% 1 3,25€ + to pay: 10,24€ + Taxes 7%: 0,72€ --------------------------------------------------------------------------------