@@ -3,7 +3,7 @@ use strict;
3
3
4
4
# textmail - mail filter to replace MS Word/HTML attachments with plain text
5
5
#
6
- # Copyright (C) 2003-2005 raf <raf@raf.org>
6
+ # Copyright (C) 2003-2007 raf <raf@raf.org>
7
7
#
8
8
# This program is free software; you can redistribute it and/or modify
9
9
# it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@ use strict;
20
20
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
21
# or visit http://www.gnu.org/copyleft/gpl.html
22
22
#
23
- # 20051129 raf <raf@raf.org>
23
+ # 20070803 raf <raf@raf.org>
24
24
25
25
=head1 NAME
26
26
@@ -348,82 +348,17 @@ C<http://raf.org/minimail/>
348
348
349
349
=head1 AUTHOR
350
350
351
- 20051129 raf <raf@raf.org>
351
+ 20070803 raf <raf@raf.org>
352
352
353
353
=head1 URL
354
354
355
355
C<http://raf.org/textmail/ >
356
356
357
357
=cut
358
358
359
- # Doco functions: usage and manpage (via $PAGER or as nroff or html)
360
-
361
- sub help
362
- {
363
- print
364
- " usage: textmail [options]\n " ,
365
- " options:\n " ,
366
- " -h - Print the help message then exit\n " ,
367
- " -m - Print the manpage then exit\n " ,
368
- " -w - Print the manpage in html format then exit\n " ,
369
- " -r - Print the manpage in nroff format then exit\n " ,
370
- " -M - Output in mailbox format\n " ,
371
- " -T - Output in raw mail format (for smtp)\n " ,
372
- " -W - Don't replace MS Word attachments with text\n " ,
373
- " -E - Don't replace MS Excel attachments with csv\n " ,
374
- " -H - Don't replace HTML attachments with text\n " ,
375
- " -R - Don't replace RTF attachments with text\n " ,
376
- " -P - Don't replace PDF attachments with text\n " ,
377
- " -U - Don't translate winmail.dat attachments\n " ,
378
- " -L - Don't reduce appledouble attachments\n " ,
379
- " -I - Don't delete image attachments\n " ,
380
- " -A - Don't delete audio attachments\n " ,
381
- " -V - Don't delete video attachments\n " ,
382
- " -X - Don't delete MS Windows executable attachments\n " ,
383
- " -B - Don't recode text that was base64-encoded\n " ,
384
- " -S - Don't replace spaces in filenames with underscores\n " ,
385
- " -Z - Do translate signed content (discards signatures)\n " ,
386
- " -O - Delete all application/octet-stream attachments\n " ,
387
- " -! - Delete all application/* attachments\n " ,
388
- " -D hdrs - Delete headers (list of header prefixes and filenames)\n " ,
389
- " -K types - Keep attachments (list of mimetypes and filenames)\n " ,
390
- " -f - On translation error, keep translation, not original\n " ,
391
- " -? - Print paths of helper applications then exit\n " ,
392
- " \n " ,
393
- " Filters a mail message or mbox, replacing MS Word, MS Excel, HTML, RTF and PDF\n " ,
394
- " attachments with the plain text contained therein. By default, the following\n " ,
395
- " attachments are also deleted: image, audio, video and MS Windows executables.\n " ,
396
- " MS winmail.dat attachments are replaced by any attachments contained therein\n " ,
397
- " which are then replaced by text or deleted in the same fashion. Any of these\n " ,
398
- " actions can be suppressed with the command line options. Mail headers can also\n " ,
399
- " be selectively deleted.\n " ;
400
- exit ;
401
- }
402
-
403
- sub man
404
- {
405
- my $noquotes = (` pod2man -h 2>&1` =~ / --quotes=/ ) ? ' --quotes=none' : ' ' ;
406
- system " pod2man $noquotes $0 | nroff -man | " . ($ENV {PAGER } || ' more' );
407
- exit ;
408
- }
409
-
410
- sub nroff
411
- {
412
- my $noquotes = (` pod2man -h 2>&1` =~ / --quotes=/ ) ? ' --quotes=none' : ' ' ;
413
- system " pod2man $noquotes $0 " ;
414
- exit ;
415
- }
359
+ # Functions from minimail: see http://raf.org/minimail/
416
360
417
- sub html
418
- {
419
- system " pod2html --noindex $0 " ;
420
- unlink glob ' pod2htm*' ;
421
- exit ;
422
- }
423
-
424
- # Minimail functions: see http://raf.org/minimail/
425
-
426
- sub formail # rfc2822 + mboxrd format (see www.qmail.org/man/man5/mbox.html)
361
+ sub formail # rfc2822 + mboxrd format (see http://www.qmail.org/man/man5/mbox.html)
427
362
{
428
363
sub mime # rfc2045, rfc2046
429
364
{
@@ -449,12 +384,10 @@ sub formail # rfc2822 + mboxrd format (see www.qmail.org/man/man5/mbox.html)
449
384
{
450
385
if (/ ^--\Q $mail ->{mime_boundary}\E (--)?/ )
451
386
{
452
- $text = substr ($text , 0, -1) if substr ($text , -1) eq " \n " ;
453
-
454
387
if ($state eq ' preamble' )
455
388
{
456
- $mail -> {mime_preamble } = $text if length $text ;
457
389
$state = ' part' ;
390
+ $mail -> {mime_preamble } = $text if length $text ;
458
391
}
459
392
elsif ($state eq ' part' )
460
393
{
@@ -532,11 +465,11 @@ sub mail2str
532
465
$head .= join ' ' , @{$mail -> {headers }} if exists $mail -> {headers };
533
466
my $body = ' ' ;
534
467
$body .= $mail -> {body } if exists $mail -> {body };
535
- $body .= " $mail ->{mime_preamble}\n " if exists $mail -> {mime_preamble };
468
+ $body .= " $mail ->{mime_preamble}" if exists $mail -> {mime_preamble };
536
469
$body .= " --$mail ->{mime_boundary}\n " if exists $mail -> {mime_boundary } && !exists $mail -> {mime_parts };
537
- $body .= join " \n " , map { " --$mail ->{mime_boundary}\n " . mail2str($_ ) } @{$mail -> {mime_parts }} if exists $mail -> {mime_parts };
538
- $body .= " \n --$mail ->{mime_boundary}--" if exists $mail -> {mime_boundary };
539
- $body .= " \n $mail ->{mime_epilogue}" if exists $mail -> {mime_epilogue };
470
+ $body .= join ( " " , map { " --$mail ->{mime_boundary}\n " . mail2str($_ ) } @{$mail -> {mime_parts }}) if exists $mail -> {mime_parts };
471
+ $body .= " --$mail ->{mime_boundary}--\n " if exists $mail -> {mime_boundary };
472
+ $body .= " $mail ->{mime_epilogue}" if exists $mail -> {mime_epilogue };
540
473
$body .= mail2str($mail -> {mime_message }) if exists $mail -> {mime_message };
541
474
$body =~ s / ^(>*From )/ >$1 / mg , $body =~ s / ([^\n ])\n ?\z / $1 \n\n / if exists $mail -> {mbox };
542
475
return $head . " \n " . $body ;
@@ -753,13 +686,13 @@ sub newmail # rfc2822, rfc2045, rfc2046, rfc2183 (also rfc3282, rfc3066, rfc2424
753
686
sub decode
754
687
{
755
688
my ($d , $e ) = @_ ;
756
- return $e =~ / ^base64$ /i ? decode_base64($d ) : $e =~ / ^quoted-printable$ /i ? decode_quoted_printable($d ) : $d ;
689
+ return $e =~ / ^base64$ /i ? decode_base64($d ) : $e =~ / ^quoted-printable$ /i ? decode_quoted_printable($d ) : substr ( $d , 0, -1) ;
757
690
}
758
691
759
692
sub encode
760
693
{
761
694
my ($d , $e ) = @_ ;
762
- return $e =~ / ^base64$ /i ? encode_base64($d ) : $e =~ / ^quoted-printable$ /i ? encode_quoted_printable($d ) : $d ;
695
+ return $e =~ / ^base64$ /i ? encode_base64($d ) : $e =~ / ^quoted-printable$ /i ? encode_quoted_printable($d ) : $d . " \n " ;
763
696
}
764
697
765
698
sub choose_encoding # rfc2822, rfc2045
@@ -928,6 +861,75 @@ sub winmail
928
861
return ($badtnef ) ? $m : map { newmail(%$_ ) } @attachment ;
929
862
}
930
863
864
+ # Doco functions: usage and manpage (via $PAGER or as nroff or html)
865
+
866
+ $ENV {LANG} = 'C';
867
+
868
+ sub help
869
+ {
870
+ print
871
+ "usage: textmail [options]\n ",
872
+ "options:\n ",
873
+ " -h - Print the help message then exit\n ",
874
+ " -m - Print the manpage then exit\n ",
875
+ " -w - Print the manpage in html format then exit\n ",
876
+ " -r - Print the manpage in nroff format then exit\n ",
877
+ " -M - Output in mailbox format\n ",
878
+ " -T - Output in raw mail format (for smtp)\n ",
879
+ " -W - Don't replace MS Word attachments with text\n ",
880
+ " -E - Don't replace MS Excel attachments with csv\n ",
881
+ " -H - Don't replace HTML attachments with text\n ",
882
+ " -R - Don't replace RTF attachments with text\n ",
883
+ " -P - Don't replace PDF attachments with text\n ",
884
+ " -U - Don't translate winmail.dat attachments\n ",
885
+ " -L - Don't reduce appledouble attachments\n ",
886
+ " -I - Don't delete image attachments\n ",
887
+ " -A - Don't delete audio attachments\n ",
888
+ " -V - Don't delete video attachments\n ",
889
+ " -X - Don't delete MS Windows executable attachments\n ",
890
+ " -B - Don't recode text that was base64-encoded\n ",
891
+ " -S - Don't replace spaces in filenames with underscores\n ",
892
+ " -Z - Do translate signed content (discards signatures)\n ",
893
+ " -O - Delete all application/octet-stream attachments\n ",
894
+ " -! - Delete all application/* attachments\n ",
895
+ " -D hdrs - Delete headers (list of header prefixes and filenames)\n ",
896
+ " -K types - Keep attachments (list of mimetypes and filenames)\n ",
897
+ " -f - On translation error, keep translation, not original\n ",
898
+ " -? - Print paths of helper applications then exit\n ",
899
+ "\n ",
900
+ "Filters a mail message or mbox, replacing MS Word, MS Excel, HTML, RTF and PDF\n ",
901
+ "attachments with the plain text contained therein. By default, the following\n ",
902
+ "attachments are also deleted: image, audio, video and MS Windows executables.\n ",
903
+ "MS winmail.dat attachments are replaced by any attachments contained therein\n ",
904
+ "which are then replaced by text or deleted in the same fashion. Any of these\n ",
905
+ "actions can be suppressed with the command line options. Mail headers can also\n ",
906
+ "be selectively deleted.\n ";
907
+ exit;
908
+ }
909
+
910
+ sub man
911
+ {
912
+ my $noquotes = (` pod2man -h 2>&1` =~ /--quotes=/) ? '--quotes=none' : '';
913
+ system "pod2man $noquotes $0 | nroff -man | " . ($ENV {PAGER} || 'more');
914
+ exit;
915
+ }
916
+
917
+ sub nroff
918
+ {
919
+ my $noquotes = (` pod2man -h 2>&1` =~ /--quotes=/) ? '--quotes=none' : '';
920
+ system "pod2man $noquotes $0 ";
921
+ exit;
922
+ }
923
+
924
+ sub html
925
+ {
926
+ system "pod2html --noindex $0 ";
927
+ unlink glob 'pod2htm*';
928
+ exit;
929
+ }
930
+
931
+ # Parse command line
932
+
931
933
my %opt ;
932
934
use Getopt::Std;
933
935
help unless getopts 'hmrwMTWEHRPLUIAVXBSZO!D:K:f?', \% opt;
@@ -980,7 +982,7 @@ if (!$removing || (($? || !defined $tmp || ! -d $tmp) && !mkdir($tmp = "/tmp/tex
980
982
exit;
981
983
};
982
984
983
- # Filter the mail message on stdin into text on stdout
985
+ # Filter mail message(s) on stdin into text on stdout
984
986
985
987
formail(sub { <> }, sub
986
988
{
@@ -994,7 +996,7 @@ rmdir $tmp or system "rm -rf $tmp";
994
996
995
997
BEGIN { $SIG {INT} = $SIG {QUIT} = $SIG {TERM} = sub { rmdir $tmp or system "rm -rf $tmp " if defined $tmp ; exit } }
996
998
997
- # Print paths to help applications then exit
999
+ # Print paths to helper applications then exit
998
1000
999
1001
sub paths
1000
1002
{
@@ -1030,7 +1032,7 @@ sub textmail
1030
1032
1031
1033
if ($remove_html && isa($entity , 'multipart/alternative') && @parts == 2)
1032
1034
{
1033
- if (isa($parts [0], 'text/plain') && isa($parts [1], 'text/html') || isa($parts [1], 'text/plain') && isa($parts [0], 'text/html'))
1035
+ if (isa($parts [0], 'text/plain') && isa($parts [1], 'text/html', qr/ \. html?$/i ) || isa($parts [1], 'text/plain') && isa($parts [0], 'text/html', qr/ \. html?$/i ))
1034
1036
{
1035
1037
my $plain = $parts [isa($parts [0], 'text/plain') ? 0 : 1];
1036
1038
@{$plain ->{headers}} = (grep(!/^content-/i, @{$entity ->{headers}}), grep { /^content-/i } @{$plain ->{headers}});
@@ -1090,7 +1092,7 @@ sub textmail
1090
1092
1091
1093
# Replace HTML attachments with plain text (via lynx -dump)
1092
1094
1093
- if ($remove_html && isa($parts [$i ], 'text/html'))
1095
+ if ($remove_html && isa($parts [$i ], 'text/html', qr/ \. html?$/i ))
1094
1096
{
1095
1097
$parts [$i ] = translate($parts [$i ], 'html,htm', 'txt', (defined $lynx ) ? "$lynx -dump -force_html" : undef);
1096
1098
next;
0 commit comments