diff --git a/README.md b/README.md index 0047d2f..d7c3a21 100644 --- a/README.md +++ b/README.md @@ -209,7 +209,7 @@ gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea -takimata sanctus est Lorem ipsum dolor sit amet.www.github.com +takimata sanctus est Lorem ipsum dolor sit amet. Github [www.github.com] At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum @@ -256,7 +256,7 @@ MAILTO FORMATING Some Company Some Street 42 Somewhere -E-Mail:test@example.com +E-Mail: Click here [test@example.com] ``` ## License diff --git a/lib/formatter.js b/lib/formatter.js index 657859f..2e5ddc8 100644 --- a/lib/formatter.js +++ b/lib/formatter.js @@ -6,7 +6,7 @@ var helper = require('./helper'); function formatText(elem, options) { var text = _s.strip(elem.raw); text = helper.decodeHTMLEntities(text); - return helper.wordwrap(text, options.wordwrap); + return helper.wordwrap(elem.needsSpace ? ' ' + text : text, options.wordwrap); }; function formatLineBreak(elem, fn, options) { @@ -21,13 +21,23 @@ function formatHeading(elem, fn, options) { return fn(elem.children, options).toUpperCase() + '\n'; } +// If we have both href and anchor text, format it in a useful manner: +// - "anchor text [href]" +// Otherwise if we have only anchor text or an href, we return the part we have: +// - "anchor text" or +// - "href" function formatAnchor(elem, fn, options) { + var href = ''; + // Always get the anchor text + var result = _s.strip(fn(elem.children || [], options)); + // Get the href, if present if (elem.attribs && elem.attribs.href) { - return elem.attribs.href.replace(/^mailto\:/, ''); - } - else { - return helper.wordwrap(helper.decodeHTMLEntities(_s.strip(elem.raw)), options.wordwrap); - } + href = elem.attribs.href.replace(/^mailto\:/, ''); + } + if (result && href) { + result += ' [' + href + ']'; + } + return formatText({ raw: result || href, needsSpace: elem.needsSpace }, options); }; function formatHorizontalLine(elem, fn, options) { diff --git a/lib/helper.js b/lib/helper.js index b90bd41..f3765dc 100644 --- a/lib/helper.js +++ b/lib/helper.js @@ -50,9 +50,10 @@ exports.decodeHTMLEntities = function decodeHTMLEntities(text) { }; exports.wordwrap = function wordwrap(text, max) { - var result = ''; + // Preserve leading space + var result = _s.startsWith(text, ' ') ? ' ' : ''; var words = _s.words(text); - var length = 0; + var length = result.length; var buffer = []; _.each(words, function(word) { if (length + word.length > max) { diff --git a/lib/html-to-text.js b/lib/html-to-text.js index 28a6281..301c894 100644 --- a/lib/html-to-text.js +++ b/lib/html-to-text.js @@ -73,11 +73,15 @@ function containsTable(attr, tables) { function walk(dom, options) { var result = ''; + var whiteSpaceRegex = /\S$/; _.each(dom, function(elem) { switch(elem.type) { case 'tag': switch(elem.name.toLowerCase()) { case 'a': + // Inline element needs a leading space if `result` currently + // doesn't end with whitespace + elem.needsSpace = whiteSpaceRegex.test(result); result += format.anchor(elem, walk, options); break; case 'p': @@ -111,7 +115,12 @@ function walk(dom, options) { } break; case 'text': - if (elem.raw !== '\r\n') result += format.text(elem, options); + if (elem.raw !== '\r\n') { + // Text needs a leading space if `result` currently + // doesn't end with whitespace + elem.needsSpace = whiteSpaceRegex.test(result); + result += format.text(elem, options); + } break; default: if (!_.include(SKIP_TYPES, elem.type)) {