@@ -20,7 +20,7 @@ use strict;
20
20
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
21
# or visit http://www.gnu.org/copyleft/gpl.html
22
22
#
23
- # 20051111 raf <raf@raf.org>
23
+ # 20051121 raf <raf@raf.org>
24
24
25
25
=head1 NAME
26
26
@@ -35,6 +35,7 @@ I<textmail> - mail filter to replace MS Word/HTML attachments with plain text
35
35
-w - Print the manpage in html format then exit
36
36
-r - Print the manpage in nroff format then exit
37
37
-M - Output in mailbox format (mboxrd)
38
+ -T - Output in raw mail format (for smtp)
38
39
-W - Don't replace MS Word attachments with text
39
40
-E - Don't replace MS Excel attachments with csv
40
41
-H - Don't replace HTML attachments with text
@@ -47,7 +48,8 @@ I<textmail> - mail filter to replace MS Word/HTML attachments with plain text
47
48
-V - Don't delete video attachments
48
49
-X - Don't delete MS Windows executable attachments
49
50
-B - Don't recode text that was base64-encoded
50
- -S ' ' - Replace spaces in filenames with ' ' (default is '_')
51
+ -S - Don't replace spaces in filenames with underscores
52
+ -Z - Do translate signed content (discards signatures)
51
53
-O - Delete all application/octet-stream attachments
52
54
-! - Delete all application/* attachments
53
55
-D hdrs - Delete headers (list of header prefixes and filenames)
@@ -102,13 +104,19 @@ manpage with a command like:
102
104
103
105
=item C<-M >
104
106
105
- This option adds a mailbox C<From > line at the top if there isn't one
106
- already and ensures that there is a blank line at the bottom of the output.
107
- It also performs mailbox quoting on any lines in the body that look like
108
- mailbox C<From > headers. Only use this when the output is to be stored
109
- directly in a mailbox file. It is not necessary when the output is to be
110
- sent to an SMTP server or when I<textmail > is being used as a mail filter by
111
- I<procmail(1) > .
107
+ This option causes the output to be in mboxrd format by adding a mailbox
108
+ C<From > line at the top if there isn't one already and ensures that there is
109
+ a blank line at the bottom of the output. It also performs mailbox quoting
110
+ on any lines in the body that look like mailbox C<From > headers. Use this
111
+ when the output is to be stored directly in a mailbox file. It is not
112
+ necessary when I<textmail > is being used as a mail filter by I<procmail(1) > .
113
+
114
+ =item C<-T >
115
+
116
+ This option causes the output to be in raw mail format by removing any
117
+ mailbox C<From > line and by not performing mailbox quoting. Use this when
118
+ the output is to be sent directly to an SMTP server. It is not necessary
119
+ when I<textmail > is being used as a mail filter by I<procmail(1) > .
112
120
113
121
=item C<-W >
114
122
@@ -191,14 +199,22 @@ appropriate. This option suppresses this recoding. Note that if the text is
191
199
large enough and contains a high enough proportion of non-ASCII characters,
192
200
it will remain C<base64 > -encoded to minimise space.
193
201
194
- =item C<-S > I<' ' >
202
+ =item C<-S >
203
+
204
+ When translating attachments, I<textmail > replaces bad filename characters
205
+ such as space characters with the underscore character. This option causes
206
+ underscore characters to subsequently be converted into space characters. In
207
+ other words, you can use this option to preserve space characters in
208
+ attachment filenames (other bad filename characters will then be converted
209
+ to spaces as well).
210
+
211
+ =item C<-Z >
195
212
196
- When translating files, I<textmail > replaces bad characters such as space
197
- characters with the underscore character. This option lets you specify a
198
- character other than underscore to which bad filename characters will be
199
- converted. In other words, you can use this option to preserve space
200
- characters in attachment filenames (other bad filename characters will then
201
- be converted to spaces as well).
213
+ By default, I<textmail > will not translate C<multipart/signed > attachments.
214
+ This option causes C<multipart/signed > attachments to be replaced by the
215
+ signed attachment contained therein, discarding the signature control data.
216
+ The no-longer-signed data is then translated to text as normal. Note that
217
+ C<multipart/encrypted > attachments are never translated.
202
218
203
219
=item C<-O >
204
220
@@ -278,7 +294,7 @@ doesn't translate the attachments contained therein into text and doesn't
278
294
delete windows executables (with output in mailbox format):
279
295
280
296
:0 fw
281
- | textmail -MWEHRPLIAVX
297
+ | textmail -MWEHRPLIAVXS
282
298
283
299
=head1 REQUIREMENTS
284
300
@@ -307,8 +323,6 @@ to do nothing (i.e. C<-WEHRPULIAVX>), then it degenerates into I<cat(1)>.
307
323
308
324
=head1 CAVEAT
309
325
310
- Mail messages that are signed or encrypted are not translated.
311
-
312
326
The latest version of I<xls2csv(1) > at the time of writing (i.e.
313
327
catdoc-0.93.3) loses data.
314
328
@@ -333,7 +347,7 @@ C<http://raf.org/minimail/>
333
347
334
348
=head1 AUTHOR
335
349
336
- 20051111 raf <raf@raf.org>
350
+ 20051121 raf <raf@raf.org>
337
351
338
352
=head1 URL
339
353
@@ -353,6 +367,7 @@ sub help
353
367
" -w - Print the manpage in html format then exit\n " ,
354
368
" -r - Print the manpage in nroff format then exit\n " ,
355
369
" -M - Output in mailbox format\n " ,
370
+ " -T - Output in raw mail format (for smtp)\n " ,
356
371
" -W - Don't replace MS Word attachments with text\n " ,
357
372
" -E - Don't replace MS Excel attachments with csv\n " ,
358
373
" -H - Don't replace HTML attachments with text\n " ,
@@ -365,7 +380,8 @@ sub help
365
380
" -V - Don't delete video attachments\n " ,
366
381
" -X - Don't delete MS Windows executable attachments\n " ,
367
382
" -B - Don't recode text that was base64-encoded\n " ,
368
- " -S ' ' - Replace spaces in filenames with ' ' (default is '_')\n " ,
383
+ " -S - Don't replace spaces in filenames with underscores\n " ,
384
+ " -Z - Do translate signed content (discards signatures)\n " ,
369
385
" -O - Delete all application/octet-stream attachments\n " ,
370
386
" -! - Delete all application/* attachments\n " ,
371
387
" -D hdrs - Delete headers (list of header prefixes and filenames)\n " ,
@@ -727,6 +743,7 @@ sub newmail # rfc2822, rfc2045, rfc2046, rfc2183 (also rfc3282, rfc3066, rfc2424
727
743
($m -> {mime_type }, $m -> {mime_boundary }, $m -> {mime_parts }) = ($type =~ / ^\s *([\w\/ .-]+)/ , $bound , $a {parts } || []) if $multi ;
728
744
($m -> {mime_type }, $m -> {mime_message }) = ($type =~ / ^\s *([\w\/ .-]+)/ , $a {message } || {}) if $msg ;
729
745
$m -> {body } = encode($a {body } || ' ' , $enc ) unless $multi || $msg ;
746
+ $m -> {mbox } = $a {mbox } if exists $a {mbox } && defined $a {mbox } && length $a {mbox };
730
747
return $m ;
731
748
}
732
749
@@ -918,14 +935,17 @@ sub winmail
918
935
919
936
my %opt ;
920
937
use Getopt::Std;
921
- help unless getopts 'hmrwMWEHRPLUIAVXBS:O !D:K:f?', \% opt;
938
+ help unless getopts 'hmrwMTWEHRPLUIAVXBSZO !D:K:f?', \% opt;
922
939
help if exists $opt {h};
923
940
man if exists $opt {m};
924
941
nroff if exists $opt {r};
925
942
html if exists $opt {w};
926
943
my $mailbox = exists $opt {M};
944
+ my $raw = exists $opt {T};
945
+ die "textmail: The -M and -T options are incompatible\n " if $mailbox && $raw ;
927
946
my $catdoc = find('catdoc');
928
- my $antiword = find('antiword') || $catdoc ;
947
+ my $antiword = find('antiword');
948
+ $antiword = $antiword ? $catdoc ? "$antiword |$catdoc " : $antiword : $catdoc ;
929
949
my $xls2csv = find('xls2csv');
930
950
my $lynx = find('lynx');
931
951
my $pdftotext = find('pdftotext');
@@ -945,14 +965,15 @@ my $remove_audio = ! exists $opt{A};
945
965
my $remove_video = ! exists $opt {V};
946
966
my $remove_exe = ! exists $opt {X};
947
967
my $recode_base64_text = ! exists $opt {B};
948
- my $replace_space = $opt {S} if exists $opt {S};
968
+ my $replace_space = ' ' if exists $opt {S};
969
+ my $remove_signed = exists $opt {Z};
949
970
my $remove_octet = exists $opt {O};
950
971
my $remove_application = exists $opt {'!'};
951
972
my $remove_headers = exists $opt {D};
952
973
my @headers = get_file($opt {D}) if $remove_headers ;
953
974
my $keep_attachments = exists $opt {K};
954
975
my @keep = get_file($opt {K}) if $keep_attachments ;
955
- my $removing = $remove_word || $remove_excel || $remove_html || $remove_rtf || $remove_pdf || $remove_tnef || $remove_apple || $remove_images || $remove_audio || $remove_video || $remove_exe || $recode_base64_text || $remove_octet || $remove_application || $remove_headers ;
976
+ my $removing = $remove_word || $remove_excel || $remove_html || $remove_rtf || $remove_pdf || $remove_tnef || $remove_apple || $remove_images || $remove_audio || $remove_video || $remove_exe || $recode_base64_text || $remove_signed || $ remove_octet || $remove_application || $remove_headers || $mailbox || $raw ;
956
977
chop(my $tmp = ` $mktemp -dq /tmp/textmail.XXXXXX` ) if $removing && defined $mktemp ;
957
978
if (!$removing || (($? || !defined $tmp || ! -d $tmp ) && !mkdir($tmp = "/tmp/textmail.$$ ", 0700)))
958
979
{
@@ -967,6 +988,7 @@ formail(sub { <> }, sub
967
988
{
968
989
my $m = mail2singlepart(textmail(mail2multipart(shift)));
969
990
delete_header($m , qr/(?:content-length|lines)/i);
991
+ delete $m ->{mbox} if $raw ;
970
992
print mail2str($mailbox ? mail2mbox($m ) : $m );
971
993
});
972
994
@@ -992,10 +1014,12 @@ sub textmail
992
1014
my $entity = shift;
993
1015
my $isapart = shift || 0;
994
1016
my @parts = @{parts($entity )};
1017
+ my $mbox = $entity ->{mbox} if exists $entity ->{mbox};
995
1018
996
- # Do nothing if this is encrypted or signed
1019
+ # Do nothing if this is encrypted ( or signed unless -Z)
997
1020
998
- return $entity if isa($entity , qr/multipart\/ (?:signed|encrypted)/i);
1021
+ return $entity if isa($entity , qr/multipart\/ encrypted/i);
1022
+ return $entity if !$remove_signed && isa($entity , qr/multipart\/ signed/i);
999
1023
1000
1024
# Remove headers
1001
1025
@@ -1011,6 +1035,7 @@ sub textmail
1011
1035
my $plain = $parts [isa($parts [0], 'text/plain') ? 0 : 1];
1012
1036
@{$plain ->{headers}} = (grep(!/^content-/i, @{$entity ->{headers}}), grep { /^content-/i } @{$plain ->{headers}});
1013
1037
%{$plain ->{header}} = (map { ($_ , $entity ->{header}->{$_ }) } grep { !/^content-/i } keys %{$entity ->{header}}), (map { ($_ , $plain ->{header}->{$_ }) } grep { /^content-/i } keys %{$plain ->{header}});
1038
+ $plain ->{mbox} = $mbox if defined $mbox ;
1014
1039
return debase64($plain );
1015
1040
}
1016
1041
}
@@ -1024,10 +1049,25 @@ sub textmail
1024
1049
my $data = $parts [1];
1025
1050
@{$data ->{headers}} = (grep(!/^content-/i, @{$entity ->{headers}}), grep { /^content-/i } @{$data ->{headers}});
1026
1051
%{$data ->{header}} = (map { ($_ , $entity ->{header}->{$_ }) } grep { !/^content-/i } keys %{$entity ->{header}}), (map { ($_ , $data ->{header}->{$_ }) } grep { /^content-/i } keys %{$data ->{header}});
1052
+ $data ->{mbox} = $mbox if defined $mbox ;
1027
1053
return mail2singlepart(textmail(mail2multipart($parts [1]), 0));
1028
1054
}
1029
1055
}
1030
1056
1057
+ # Reduce signed attachments to just the signed data attachment
1058
+
1059
+ if ($remove_signed && isa($entity , 'multipart/signed') && @parts == 2)
1060
+ {
1061
+ if (isa($parts [1], param($entity , 'content-type', 'protocol')))
1062
+ {
1063
+ my $data = $parts [0];
1064
+ @{$data ->{headers}} = (grep(!/^content-/i, @{$entity ->{headers}}), grep { /^content-/i } @{$data ->{headers}});
1065
+ %{$data ->{header}} = (map { ($_ , $entity ->{header}->{$_ }) } grep { !/^content-/i } keys %{$entity ->{header}}), (map { ($_ , $data ->{header}->{$_ }) } grep { /^content-/i } keys %{$data ->{header}});
1066
+ $data ->{mbox} = $mbox if defined $mbox ;
1067
+ return mail2singlepart(textmail(mail2multipart($parts [0]), 0));
1068
+ }
1069
+ }
1070
+
1031
1071
# Process parts
1032
1072
1033
1073
for (my $i = 0; $i < @parts ; ++$i )
@@ -1164,7 +1204,7 @@ sub translate
1164
1204
return newmail(filename => $textpath , body => '') if !defined $cmd && $force ;
1165
1205
my $origdata = body($part );
1166
1206
open A, ">$tmp /$origpath " and do { print A $origdata ; close A };
1167
- my $failed = $origpath ne $textpath && system($cmd . ' ' . quotemeta("$tmp /$origpath ") . ' > ' . quotemeta("$tmp /$textpath ")) || -s "$tmp /$origpath " && -z "$tmp /$textpath ";
1207
+ my $failed ; $failed = $origpath ne $textpath && system($_ . ' ' . quotemeta("$tmp /$origpath ") . ' > ' . quotemeta("$tmp /$textpath ")) || -s "$tmp /$origpath " && -z "$tmp /$textpath " or last for split / \| /, $cmd ;
1168
1208
unlink "$tmp /$origpath " unless $origpath eq $textpath ;
1169
1209
unlink("$tmp /$textpath "), return $part if $failed && !$force ;
1170
1210
$part = newmail(filename => "$tmp /$textpath "); unlink "$tmp /$textpath ";
@@ -1181,7 +1221,8 @@ sub debase64
1181
1221
return $entity unless $type =~ /^text\/ /i && encoding($entity ) =~ /^base64$/i;
1182
1222
my $body = body($entity ); $body =~ tr/\r //d;
1183
1223
my $name = filename($entity );
1184
- return newmail(type => $type , body => $body , (defined $name ? (name => $name ) : ()));
1224
+ my $mbox = $entity ->{mbox} if exists $entity ->{mbox};
1225
+ return newmail(type => $type , body => $body , (defined $name ? (name => $name ) : ()), (defined $mbox ? (mbox => $mbox ) : ()));
1185
1226
}
1186
1227
1187
1228
# Parse a data file
0 commit comments