From da89f3dd14f052d15efdc07c915dfad8f401a9ee Mon Sep 17 00:00:00 2001 From: Tom Wills Date: Mon, 8 Dec 2014 20:44:26 +0000 Subject: [PATCH 1/4] Allow string results from XPath queries --- tools/scrape | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/scrape b/tools/scrape index 60de2595..db22b5f8 100755 --- a/tools/scrape +++ b/tools/scrape @@ -54,7 +54,10 @@ def main(): for e in document.xpath(expression): try: if not args.argument: - text = etree.tostring(e) + if isinstance(e, str): + text = e + else: + text = etree.tostring(e) else: text = e.get(args.argument) if text is not None: From 787b85cda2ac7385015a3308c3499d421a737835 Mon Sep 17 00:00:00 2001 From: Tom Wills Date: Mon, 8 Dec 2014 20:48:51 +0000 Subject: [PATCH 2/4] corrected indents --- tools/scrape | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/scrape b/tools/scrape index db22b5f8..df6f6dfc 100755 --- a/tools/scrape +++ b/tools/scrape @@ -55,9 +55,9 @@ def main(): try: if not args.argument: if isinstance(e, str): - text = e - else: - text = etree.tostring(e) + text = e + else: + text = etree.tostring(e) else: text = e.get(args.argument) if text is not None: From dbca1c695187affa2ee3e9f6714f7ed920dca905 Mon Sep 17 00:00:00 2001 From: Tom Wills Date: Mon, 7 Nov 2016 15:00:13 +0000 Subject: [PATCH 3/4] Add option to suppress trailing newline --- tools/scrape | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/scrape b/tools/scrape index df6f6dfc..9d63cf0a 100755 --- a/tools/scrape +++ b/tools/scrape @@ -28,6 +28,7 @@ def main(): parser.add_argument('-r', '--rawinput', action='store_true', default=False, help="Do not parse HTML before feeding etree (useful" "for escaping CData)") + parser.add_argument('-n', '--nonewline', action='store_true', default=False, help="Do not output trailing newline") args = parser.parse_args() args.expression = args.expression.decode('utf-8') @@ -48,8 +49,13 @@ def main(): else: document = etree.parse(args.html, html_parser) + if args.nonewline: + trailing = "" + else: + trailing = "\n" + if args.body: - sys.stdout.write("\n\n\n") + sys.stdout.write("\n\n" + trailing) for e in document.xpath(expression): try: @@ -61,13 +67,13 @@ def main(): else: text = e.get(args.argument) if text is not None: - sys.stdout.write(text.encode('utf-8') + "\n") + sys.stdout.write(text.encode('utf-8') + trailing) sys.stdout.flush() except IOError: pass if args.body: - sys.stdout.write("\n\n") + sys.stdout.write("\n" + trailing) if __name__ == "__main__": exit(main()) From 23cf5b5e89c804b5338610867646766e8f85a3da Mon Sep 17 00:00:00 2001 From: Tom Wills Date: Mon, 7 Nov 2016 16:14:57 +0000 Subject: [PATCH 4/4] Add option to force expression to be parsed as XPath --- tools/scrape | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/scrape b/tools/scrape index 9d63cf0a..5df93118 100755 --- a/tools/scrape +++ b/tools/scrape @@ -28,12 +28,13 @@ def main(): parser.add_argument('-r', '--rawinput', action='store_true', default=False, help="Do not parse HTML before feeding etree (useful" "for escaping CData)") + parser.add_argument('-x', '--xpath', action='store_true', default=False, help="Force expression to be parsed as XPath") parser.add_argument('-n', '--nonewline', action='store_true', default=False, help="Do not output trailing newline") args = parser.parse_args() args.expression = args.expression.decode('utf-8') - if not args.expression.startswith('//'): + if (not args.expression.startswith('//') and not args.xpath): from cssselect import GenericTranslator, SelectorError try: expression = GenericTranslator().css_to_xpath(args.expression)