Skip to content

Commit c69ac07

Browse files
committed
20070803
- Set LANG=C when generating the manpage - Detect html attachments even when mimetype is application/octet-stream - Fix: stop optimizing blanks, might be signed (spotted by jmc at dolorespark.org)
1 parent 22b14bd commit c69ac07

File tree

2 files changed

+91
-83
lines changed

2 files changed

+91
-83
lines changed

CHANGELOG

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
20070803
2+
3+
- Set LANG=C when generating the manpage
4+
- Detect html attachments even when mimetype is application/octet-stream
5+
- Fix: stop optimizing blanks, might be signed (spotted by jmc at dolorespark.org)
6+
17
20060525
28

39
- Fixed signal handler (wasn't calling exit after cleaning up!)

textmail

Lines changed: 85 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use strict;
33

44
# textmail - mail filter to replace MS Word/HTML attachments with plain text
55
#
6-
# Copyright (C) 2003-2005 raf <raf@raf.org>
6+
# Copyright (C) 2003-2007 raf <raf@raf.org>
77
#
88
# This program is free software; you can redistribute it and/or modify
99
# it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@ use strict;
2020
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2121
# or visit http://www.gnu.org/copyleft/gpl.html
2222
#
23-
# 20051129 raf <raf@raf.org>
23+
# 20070803 raf <raf@raf.org>
2424

2525
=head1 NAME
2626
@@ -348,82 +348,17 @@ C<http://raf.org/minimail/>
348348
349349
=head1 AUTHOR
350350
351-
20051129 raf <raf@raf.org>
351+
20070803 raf <raf@raf.org>
352352
353353
=head1 URL
354354
355355
C<http://raf.org/textmail/>
356356
357357
=cut
358358

359-
# Doco functions: usage and manpage (via $PAGER or as nroff or html)
360-
361-
sub help
362-
{
363-
print
364-
"usage: textmail [options]\n",
365-
"options:\n",
366-
" -h - Print the help message then exit\n",
367-
" -m - Print the manpage then exit\n",
368-
" -w - Print the manpage in html format then exit\n",
369-
" -r - Print the manpage in nroff format then exit\n",
370-
" -M - Output in mailbox format\n",
371-
" -T - Output in raw mail format (for smtp)\n",
372-
" -W - Don't replace MS Word attachments with text\n",
373-
" -E - Don't replace MS Excel attachments with csv\n",
374-
" -H - Don't replace HTML attachments with text\n",
375-
" -R - Don't replace RTF attachments with text\n",
376-
" -P - Don't replace PDF attachments with text\n",
377-
" -U - Don't translate winmail.dat attachments\n",
378-
" -L - Don't reduce appledouble attachments\n",
379-
" -I - Don't delete image attachments\n",
380-
" -A - Don't delete audio attachments\n",
381-
" -V - Don't delete video attachments\n",
382-
" -X - Don't delete MS Windows executable attachments\n",
383-
" -B - Don't recode text that was base64-encoded\n",
384-
" -S - Don't replace spaces in filenames with underscores\n",
385-
" -Z - Do translate signed content (discards signatures)\n",
386-
" -O - Delete all application/octet-stream attachments\n",
387-
" -! - Delete all application/* attachments\n",
388-
" -D hdrs - Delete headers (list of header prefixes and filenames)\n",
389-
" -K types - Keep attachments (list of mimetypes and filenames)\n",
390-
" -f - On translation error, keep translation, not original\n",
391-
" -? - Print paths of helper applications then exit\n",
392-
"\n",
393-
"Filters a mail message or mbox, replacing MS Word, MS Excel, HTML, RTF and PDF\n",
394-
"attachments with the plain text contained therein. By default, the following\n",
395-
"attachments are also deleted: image, audio, video and MS Windows executables.\n",
396-
"MS winmail.dat attachments are replaced by any attachments contained therein\n",
397-
"which are then replaced by text or deleted in the same fashion. Any of these\n",
398-
"actions can be suppressed with the command line options. Mail headers can also\n",
399-
"be selectively deleted.\n";
400-
exit;
401-
}
402-
403-
sub man
404-
{
405-
my $noquotes = (`pod2man -h 2>&1` =~ /--quotes=/) ? '--quotes=none' : '';
406-
system "pod2man $noquotes $0 | nroff -man | " . ($ENV{PAGER} || 'more');
407-
exit;
408-
}
409-
410-
sub nroff
411-
{
412-
my $noquotes = (`pod2man -h 2>&1` =~ /--quotes=/) ? '--quotes=none' : '';
413-
system "pod2man $noquotes $0";
414-
exit;
415-
}
359+
# Functions from minimail: see http://raf.org/minimail/
416360

417-
sub html
418-
{
419-
system "pod2html --noindex $0";
420-
unlink glob 'pod2htm*';
421-
exit;
422-
}
423-
424-
# Minimail functions: see http://raf.org/minimail/
425-
426-
sub formail # rfc2822 + mboxrd format (see www.qmail.org/man/man5/mbox.html)
361+
sub formail # rfc2822 + mboxrd format (see http://www.qmail.org/man/man5/mbox.html)
427362
{
428363
sub mime # rfc2045, rfc2046
429364
{
@@ -449,12 +384,10 @@ sub formail # rfc2822 + mboxrd format (see www.qmail.org/man/man5/mbox.html)
449384
{
450385
if (/^--\Q$mail->{mime_boundary}\E(--)?/)
451386
{
452-
$text = substr($text, 0, -1) if substr($text, -1) eq "\n";
453-
454387
if ($state eq 'preamble')
455388
{
456-
$mail->{mime_preamble} = $text if length $text;
457389
$state = 'part';
390+
$mail->{mime_preamble} = $text if length $text;
458391
}
459392
elsif ($state eq 'part')
460393
{
@@ -532,11 +465,11 @@ sub mail2str
532465
$head .= join '', @{$mail->{headers}} if exists $mail->{headers};
533466
my $body = '';
534467
$body .= $mail->{body} if exists $mail->{body};
535-
$body .= "$mail->{mime_preamble}\n" if exists $mail->{mime_preamble};
468+
$body .= "$mail->{mime_preamble}" if exists $mail->{mime_preamble};
536469
$body .= "--$mail->{mime_boundary}\n" if exists $mail->{mime_boundary} && !exists $mail->{mime_parts};
537-
$body .= join "\n", map { "--$mail->{mime_boundary}\n" . mail2str($_) } @{$mail->{mime_parts}} if exists $mail->{mime_parts};
538-
$body .= "\n--$mail->{mime_boundary}--" if exists $mail->{mime_boundary};
539-
$body .= "\n$mail->{mime_epilogue}" if exists $mail->{mime_epilogue};
470+
$body .= join("", map { "--$mail->{mime_boundary}\n" . mail2str($_) } @{$mail->{mime_parts}}) if exists $mail->{mime_parts};
471+
$body .= "--$mail->{mime_boundary}--\n" if exists $mail->{mime_boundary};
472+
$body .= "$mail->{mime_epilogue}" if exists $mail->{mime_epilogue};
540473
$body .= mail2str($mail->{mime_message}) if exists $mail->{mime_message};
541474
$body =~ s/^(>*From )/>$1/mg, $body =~ s/([^\n])\n?\z/$1\n\n/ if exists $mail->{mbox};
542475
return $head . "\n" . $body;
@@ -753,13 +686,13 @@ sub newmail # rfc2822, rfc2045, rfc2046, rfc2183 (also rfc3282, rfc3066, rfc2424
753686
sub decode
754687
{
755688
my ($d, $e) = @_;
756-
return $e =~ /^base64$/i ? decode_base64($d) : $e =~ /^quoted-printable$/i ? decode_quoted_printable($d) : $d;
689+
return $e =~ /^base64$/i ? decode_base64($d) : $e =~ /^quoted-printable$/i ? decode_quoted_printable($d) : substr($d, 0, -1);
757690
}
758691

759692
sub encode
760693
{
761694
my ($d, $e) = @_;
762-
return $e =~ /^base64$/i ? encode_base64($d) : $e =~ /^quoted-printable$/i ? encode_quoted_printable($d) : $d;
695+
return $e =~ /^base64$/i ? encode_base64($d) : $e =~ /^quoted-printable$/i ? encode_quoted_printable($d) : $d . "\n";
763696
}
764697

765698
sub choose_encoding # rfc2822, rfc2045
@@ -928,6 +861,75 @@ sub winmail
928861
return ($badtnef) ? $m : map { newmail(%$_) } @attachment;
929862
}
930863
864+
# Doco functions: usage and manpage (via $PAGER or as nroff or html)
865+
866+
$ENV{LANG} = 'C';
867+
868+
sub help
869+
{
870+
print
871+
"usage: textmail [options]\n",
872+
"options:\n",
873+
" -h - Print the help message then exit\n",
874+
" -m - Print the manpage then exit\n",
875+
" -w - Print the manpage in html format then exit\n",
876+
" -r - Print the manpage in nroff format then exit\n",
877+
" -M - Output in mailbox format\n",
878+
" -T - Output in raw mail format (for smtp)\n",
879+
" -W - Don't replace MS Word attachments with text\n",
880+
" -E - Don't replace MS Excel attachments with csv\n",
881+
" -H - Don't replace HTML attachments with text\n",
882+
" -R - Don't replace RTF attachments with text\n",
883+
" -P - Don't replace PDF attachments with text\n",
884+
" -U - Don't translate winmail.dat attachments\n",
885+
" -L - Don't reduce appledouble attachments\n",
886+
" -I - Don't delete image attachments\n",
887+
" -A - Don't delete audio attachments\n",
888+
" -V - Don't delete video attachments\n",
889+
" -X - Don't delete MS Windows executable attachments\n",
890+
" -B - Don't recode text that was base64-encoded\n",
891+
" -S - Don't replace spaces in filenames with underscores\n",
892+
" -Z - Do translate signed content (discards signatures)\n",
893+
" -O - Delete all application/octet-stream attachments\n",
894+
" -! - Delete all application/* attachments\n",
895+
" -D hdrs - Delete headers (list of header prefixes and filenames)\n",
896+
" -K types - Keep attachments (list of mimetypes and filenames)\n",
897+
" -f - On translation error, keep translation, not original\n",
898+
" -? - Print paths of helper applications then exit\n",
899+
"\n",
900+
"Filters a mail message or mbox, replacing MS Word, MS Excel, HTML, RTF and PDF\n",
901+
"attachments with the plain text contained therein. By default, the following\n",
902+
"attachments are also deleted: image, audio, video and MS Windows executables.\n",
903+
"MS winmail.dat attachments are replaced by any attachments contained therein\n",
904+
"which are then replaced by text or deleted in the same fashion. Any of these\n",
905+
"actions can be suppressed with the command line options. Mail headers can also\n",
906+
"be selectively deleted.\n";
907+
exit;
908+
}
909+
910+
sub man
911+
{
912+
my $noquotes = (`pod2man -h 2>&1` =~ /--quotes=/) ? '--quotes=none' : '';
913+
system "pod2man $noquotes $0 | nroff -man | " . ($ENV{PAGER} || 'more');
914+
exit;
915+
}
916+
917+
sub nroff
918+
{
919+
my $noquotes = (`pod2man -h 2>&1` =~ /--quotes=/) ? '--quotes=none' : '';
920+
system "pod2man $noquotes $0";
921+
exit;
922+
}
923+
924+
sub html
925+
{
926+
system "pod2html --noindex $0";
927+
unlink glob 'pod2htm*';
928+
exit;
929+
}
930+
931+
# Parse command line
932+
931933
my %opt;
932934
use Getopt::Std;
933935
help unless getopts 'hmrwMTWEHRPLUIAVXBSZO!D:K:f?', \%opt;
@@ -980,7 +982,7 @@ if (!$removing || (($? || !defined $tmp || ! -d $tmp) && !mkdir($tmp = "/tmp/tex
980982
exit;
981983
};
982984
983-
# Filter the mail message on stdin into text on stdout
985+
# Filter mail message(s) on stdin into text on stdout
984986
985987
formail(sub { <> }, sub
986988
{
@@ -994,7 +996,7 @@ rmdir $tmp or system "rm -rf $tmp";
994996
995997
BEGIN { $SIG{INT} = $SIG{QUIT} = $SIG{TERM} = sub { rmdir $tmp or system "rm -rf $tmp" if defined $tmp; exit } }
996998
997-
# Print paths to help applications then exit
999+
# Print paths to helper applications then exit
9981000
9991001
sub paths
10001002
{
@@ -1030,7 +1032,7 @@ sub textmail
10301032
10311033
if ($remove_html && isa($entity, 'multipart/alternative') && @parts == 2)
10321034
{
1033-
if (isa($parts[0], 'text/plain') && isa($parts[1], 'text/html') || isa($parts[1], 'text/plain') && isa($parts[0], 'text/html'))
1035+
if (isa($parts[0], 'text/plain') && isa($parts[1], 'text/html', qr/\.html?$/i) || isa($parts[1], 'text/plain') && isa($parts[0], 'text/html', qr/\.html?$/i))
10341036
{
10351037
my $plain = $parts[isa($parts[0], 'text/plain') ? 0 : 1];
10361038
@{$plain->{headers}} = (grep(!/^content-/i, @{$entity->{headers}}), grep { /^content-/i } @{$plain->{headers}});
@@ -1090,7 +1092,7 @@ sub textmail
10901092
10911093
# Replace HTML attachments with plain text (via lynx -dump)
10921094
1093-
if ($remove_html && isa($parts[$i], 'text/html'))
1095+
if ($remove_html && isa($parts[$i], 'text/html', qr/\.html?$/i))
10941096
{
10951097
$parts[$i] = translate($parts[$i], 'html,htm', 'txt', (defined $lynx) ? "$lynx -dump -force_html" : undef);
10961098
next;

0 commit comments

Comments
 (0)